[Python-checkins] python/dist/src/Lib sre_compile.py,1.51,1.52

rhettinger at users.sourceforge.net rhettinger at users.sourceforge.net
Fri Mar 26 06:17:12 EST 2004


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32139

Modified Files:
	sre_compile.py 
Log Message:
Simple optimizations:
* pre-build a single identity function for the fixup function
* pre-build membership tests in dictionaries instead of in-line tuples
* assign len() to a local variable
* assign append() methods to a local variable
* use xrange() instead of range()
* replace "x<<1" with "x+x"



Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.51
retrieving revision 1.52
diff -C2 -d -r1.51 -r1.52
*** sre_compile.py	25 Mar 2004 13:50:59 -0000	1.51
--- sre_compile.py	26 Mar 2004 11:16:55 -0000	1.52
***************
*** 22,30 ****
      MAXCODE = 0xFFFFFFFFL
  
  def _compile(code, pattern, flags):
      # internal: compile a (sub)pattern
      emit = code.append
      for op, av in pattern:
!         if op in (LITERAL, NOT_LITERAL):
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
--- 22,44 ----
      MAXCODE = 0xFFFFFFFFL
  
+ def _identityfunction(x):
+     return x
+ 
+ # use xrange if available
+ try:
+     xrange
+ except NameError:
+     xrange = range
+ 
  def _compile(code, pattern, flags):
      # internal: compile a (sub)pattern
      emit = code.append
+     _len = len
+     LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1}
+     REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1}
+     SUCCESS_CODES = {SUCCESS:1, FAILURE:1}
+     ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1}
      for op, av in pattern:
!         if op in LITERAL_CODES:
              if flags & SRE_FLAG_IGNORECASE:
                  emit(OPCODES[OP_IGNORE[op]])
***************
*** 40,47 ****
              else:
                  emit(OPCODES[op])
!                 fixup = lambda x: x
!             skip = len(code); emit(0)
              _compile_charset(av, flags, code, fixup)
!             code[skip] = len(code) - skip
          elif op is ANY:
              if flags & SRE_FLAG_DOTALL:
--- 54,61 ----
              else:
                  emit(OPCODES[op])
!                 fixup = _identityfunction
!             skip = _len(code); emit(0)
              _compile_charset(av, flags, code, fixup)
!             code[skip] = _len(code) - skip
          elif op is ANY:
              if flags & SRE_FLAG_DOTALL:
***************
*** 49,81 ****
              else:
                  emit(OPCODES[ANY])
!         elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
              if flags & SRE_FLAG_TEMPLATE:
                  raise error, "internal: unsupported template operator"
                  emit(OPCODES[REPEAT])
!                 skip = len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
                  emit(OPCODES[SUCCESS])
!                 code[skip] = len(code) - skip
!             elif _simple(av) and op != REPEAT:
!                 if op == MAX_REPEAT:
                      emit(OPCODES[REPEAT_ONE])
                  else:
                      emit(OPCODES[MIN_REPEAT_ONE])
!                 skip = len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
                  emit(OPCODES[SUCCESS])
!                 code[skip] = len(code) - skip
              else:
                  emit(OPCODES[REPEAT])
!                 skip = len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
!                 code[skip] = len(code) - skip
!                 if op == MAX_REPEAT:
                      emit(OPCODES[MAX_UNTIL])
                  else:
--- 63,95 ----
              else:
                  emit(OPCODES[ANY])
!         elif op in REPEATING_CODES:
              if flags & SRE_FLAG_TEMPLATE:
                  raise error, "internal: unsupported template operator"
                  emit(OPCODES[REPEAT])
!                 skip = _len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
                  emit(OPCODES[SUCCESS])
!                 code[skip] = _len(code) - skip
!             elif _simple(av) and op is not REPEAT:
!                 if op is MAX_REPEAT:
                      emit(OPCODES[REPEAT_ONE])
                  else:
                      emit(OPCODES[MIN_REPEAT_ONE])
!                 skip = _len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
                  emit(OPCODES[SUCCESS])
!                 code[skip] = _len(code) - skip
              else:
                  emit(OPCODES[REPEAT])
!                 skip = _len(code); emit(0)
                  emit(av[0])
                  emit(av[1])
                  _compile(code, av[2], flags)
!                 code[skip] = _len(code) - skip
!                 if op is MAX_REPEAT:
                      emit(OPCODES[MAX_UNTIL])
                  else:
***************
*** 90,98 ****
                  emit(OPCODES[MARK])
                  emit((av[0]-1)*2+1)
!         elif op in (SUCCESS, FAILURE):
              emit(OPCODES[op])
!         elif op in (ASSERT, ASSERT_NOT):
              emit(OPCODES[op])
!             skip = len(code); emit(0)
              if av[0] >= 0:
                  emit(0) # look ahead
--- 104,112 ----
                  emit(OPCODES[MARK])
                  emit((av[0]-1)*2+1)
!         elif op in SUCCESS_CODES:
              emit(OPCODES[op])
!         elif op in ASSERT_CODES:
              emit(OPCODES[op])
!             skip = _len(code); emit(0)
              if av[0] >= 0:
                  emit(0) # look ahead
***************
*** 104,114 ****
              _compile(code, av[1], flags)
              emit(OPCODES[SUCCESS])
!             code[skip] = len(code) - skip
          elif op is CALL:
              emit(OPCODES[op])
!             skip = len(code); emit(0)
              _compile(code, av, flags)
              emit(OPCODES[SUCCESS])
!             code[skip] = len(code) - skip
          elif op is AT:
              emit(OPCODES[op])
--- 118,128 ----
              _compile(code, av[1], flags)
              emit(OPCODES[SUCCESS])
!             code[skip] = _len(code) - skip
          elif op is CALL:
              emit(OPCODES[op])
!             skip = _len(code); emit(0)
              _compile(code, av, flags)
              emit(OPCODES[SUCCESS])
!             code[skip] = _len(code) - skip
          elif op is AT:
              emit(OPCODES[op])
***************
*** 123,136 ****
              emit(OPCODES[op])
              tail = []
              for av in av[1]:
!                 skip = len(code); emit(0)
                  # _compile_info(code, av, flags)
                  _compile(code, av, flags)
                  emit(OPCODES[JUMP])
!                 tail.append(len(code)); emit(0)
!                 code[skip] = len(code) - skip
              emit(0) # end of branch
              for tail in tail:
!                 code[tail] = len(code) - tail
          elif op is CATEGORY:
              emit(OPCODES[op])
--- 137,151 ----
              emit(OPCODES[op])
              tail = []
+             tailappend = tail.append
              for av in av[1]:
!                 skip = _len(code); emit(0)
                  # _compile_info(code, av, flags)
                  _compile(code, av, flags)
                  emit(OPCODES[JUMP])
!                 tailappend(_len(code)); emit(0)
!                 code[skip] = _len(code) - skip
              emit(0) # end of branch
              for tail in tail:
!                 code[tail] = _len(code) - tail
          elif op is CATEGORY:
              emit(OPCODES[op])
***************
*** 149,162 ****
              emit(OPCODES[op])
              emit((av[0]-1)*2)
!             skipyes = len(code); emit(0)
              _compile(code, av[1], flags)
              if av[2]:
                  emit(OPCODES[JUMP])
!                 skipno = len(code); emit(0)
!                 code[skipyes] = len(code) - skipyes + 1
                  _compile(code, av[2], flags)
!                 code[skipno] = len(code) - skipno
              else:
!                 code[skipyes] = len(code) - skipyes + 1
          else:
              raise ValueError, ("unsupported operand type", op)
--- 164,177 ----
              emit(OPCODES[op])
              emit((av[0]-1)*2)
!             skipyes = _len(code); emit(0)
              _compile(code, av[1], flags)
              if av[2]:
                  emit(OPCODES[JUMP])
!                 skipno = _len(code); emit(0)
!                 code[skipyes] = _len(code) - skipyes + 1
                  _compile(code, av[2], flags)
!                 code[skipno] = _len(code) - skipno
              else:
!                 code[skipyes] = _len(code) - skipyes + 1
          else:
              raise ValueError, ("unsupported operand type", op)
***************
*** 166,170 ****
      emit = code.append
      if fixup is None:
!         fixup = lambda x: x
      for op, av in _optimize_charset(charset, fixup):
          emit(OPCODES[op])
--- 181,185 ----
      emit = code.append
      if fixup is None:
!         fixup = _identityfunction
      for op, av in _optimize_charset(charset, fixup):
          emit(OPCODES[op])
***************
*** 194,202 ****
      # internal: optimize character set
      out = []
      charmap = [False]*256
      try:
          for op, av in charset:
              if op is NEGATE:
!                 out.append((op, av))
              elif op is LITERAL:
                  charmap[fixup(av)] = True
--- 209,218 ----
      # internal: optimize character set
      out = []
+     outappend = out.append
      charmap = [False]*256
      try:
          for op, av in charset:
              if op is NEGATE:
!                 outappend((op, av))
              elif op is LITERAL:
                  charmap[fixup(av)] = True
***************
*** 213,216 ****
--- 229,233 ----
      i = p = n = 0
      runs = []
+     runsappend = runs.append
      for c in charmap:
          if c:
***************
*** 219,234 ****
              n = n + 1
          elif n:
!             runs.append((p, n))
              n = 0
          i = i + 1
      if n:
!         runs.append((p, n))
      if len(runs) <= 2:
          # use literal/range
          for p, n in runs:
              if n == 1:
!                 out.append((LITERAL, p))
              else:
!                 out.append((RANGE, (p, p+n-1)))
          if len(out) < len(charset):
              return out
--- 236,251 ----
              n = n + 1
          elif n:
!             runsappend((p, n))
              n = 0
          i = i + 1
      if n:
!         runsappend((p, n))
      if len(runs) <= 2:
          # use literal/range
          for p, n in runs:
              if n == 1:
!                 outappend((LITERAL, p))
              else:
!                 outappend((RANGE, (p, p+n-1)))
          if len(out) < len(charset):
              return out
***************
*** 236,240 ****
          # use bitmap
          data = _mk_bitmap(charmap)
!         out.append((CHARSET, data))
          return out
      return charset
--- 253,257 ----
          # use bitmap
          data = _mk_bitmap(charmap)
!         outappend((CHARSET, data))
          return out
      return charset
***************
*** 242,245 ****
--- 259,263 ----
  def _mk_bitmap(bits):
      data = []
+     dataappend = data.append
      if _sre.CODESIZE == 2:
          start = (1, 0)
***************
*** 250,256 ****
          if c:
              v = v + m
!         m = m << 1
          if m > MAXCODE:
!             data.append(v)
              m, v = start
      return data
--- 268,274 ----
          if c:
              v = v + m
!         m = m + m
          if m > MAXCODE:
!             dataappend(v)
              m, v = start
      return data
***************
*** 296,300 ****
                  charmap[fixup(av)] = True
              elif op is RANGE:
!                 for i in range(fixup(av[0]), fixup(av[1])+1):
                      charmap[i] = True
              elif op is CATEGORY:
--- 314,318 ----
                  charmap[fixup(av)] = True
              elif op is RANGE:
!                 for i in xrange(fixup(av[0]), fixup(av[1])+1):
                      charmap[i] = True
              elif op is CATEGORY:
***************
*** 308,312 ****
              # XXX: negation does not work with big charsets
              return charset
!         for i in range(65536):
              charmap[i] = not charmap[i]
      comps = {}
--- 326,330 ----
              # XXX: negation does not work with big charsets
              return charset
!         for i in xrange(65536):
              charmap[i] = not charmap[i]
      comps = {}
***************
*** 314,318 ****
      block = 0
      data = []
!     for i in range(256):
          chunk = tuple(charmap[i*256:(i+1)*256])
          new = comps.setdefault(chunk, block)
--- 332,336 ----
      block = 0
      data = []
!     for i in xrange(256):
          chunk = tuple(charmap[i*256:(i+1)*256])
          new = comps.setdefault(chunk, block)
***************
*** 349,354 ****
--- 367,374 ----
      # look for a literal prefix
      prefix = []
+     prefixappend = prefix.append
      prefix_skip = 0
      charset = [] # not used
+     charsetappend = charset.append
      if not (flags & SRE_FLAG_IGNORECASE):
          # look for literal prefix
***************
*** 357,365 ****
                  if len(prefix) == prefix_skip:
                      prefix_skip = prefix_skip + 1
!                 prefix.append(av)
              elif op is SUBPATTERN and len(av[1]) == 1:
                  op, av = av[1][0]
                  if op is LITERAL:
!                     prefix.append(av)
                  else:
                      break
--- 377,385 ----
                  if len(prefix) == prefix_skip:
                      prefix_skip = prefix_skip + 1
!                 prefixappend(av)
              elif op is SUBPATTERN and len(av[1]) == 1:
                  op, av = av[1][0]
                  if op is LITERAL:
!                     prefixappend(av)
                  else:
                      break
***************
*** 372,378 ****
                  op, av = av[1][0]
                  if op is LITERAL:
!                     charset.append((op, av))
                  elif op is BRANCH:
                      c = []
                      for p in av[1]:
                          if not p:
--- 392,399 ----
                  op, av = av[1][0]
                  if op is LITERAL:
!                     charsetappend((op, av))
                  elif op is BRANCH:
                      c = []
+                     cappend = c.append
                      for p in av[1]:
                          if not p:
***************
*** 380,384 ****
                          op, av = p[0]
                          if op is LITERAL:
!                             c.append((op, av))
                          else:
                              break
--- 401,405 ----
                          op, av = p[0]
                          if op is LITERAL:
!                             cappend((op, av))
                          else:
                              break
***************
*** 387,390 ****
--- 408,412 ----
              elif op is BRANCH:
                  c = []
+                 cappend = c.append
                  for p in av[1]:
                      if not p:
***************
*** 392,396 ****
                      op, av = p[0]
                      if op is LITERAL:
!                         c.append((op, av))
                      else:
                          break
--- 414,418 ----
                      op, av = p[0]
                      if op is LITERAL:
!                         cappend((op, av))
                      else:
                          break
***************
*** 433,437 ****
          # generate overlap table
          table = [-1] + ([0]*len(prefix))
!         for i in range(len(prefix)):
              table[i+1] = table[i]+1
              while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
--- 455,459 ----
          # generate overlap table
          table = [-1] + ([0]*len(prefix))
!         for i in xrange(len(prefix)):
              table[i+1] = table[i]+1
              while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:




More information about the Python-checkins mailing list