[Python-checkins] python/dist/src/Lib sre_parse.py,1.59,1.60

rhettinger at users.sourceforge.net rhettinger at users.sourceforge.net
Fri Mar 26 18:24:15 EST 2004


Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv22850

Modified Files:
	sre_parse.py 
Log Message:
Simple Optimizations:
* Factor constant expressions out of loops.
* Presize a list being grown to a known length.



Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.59
retrieving revision 1.60
diff -C2 -d -r1.59 -r1.60
*** sre_parse.py	18 Jan 2004 20:29:54 -0000	1.59
--- sre_parse.py	26 Mar 2004 23:24:00 -0000	1.60
***************
*** 104,107 ****
--- 104,108 ----
      def dump(self, level=0):
          nl = 1
+         seqtypes = type(()), type([])
          for op, av in self.data:
              print level*"  " + op,; nl = 0
***************
*** 119,123 ****
                      a.dump(level+1); nl = 1
                      i = i + 1
!             elif type(av) in (type(()), type([])):
                  for a in av:
                      if isinstance(a, SubPattern):
--- 120,124 ----
                      a.dump(level+1); nl = 1
                      i = i + 1
!             elif type(av) in seqtypes:
                  for a in av:
                      if isinstance(a, SubPattern):
***************
*** 150,153 ****
--- 151,156 ----
              return self.width
          lo = hi = 0L
+         UNITCODES = (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY)
+         REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
          for op, av in self.data:
              if op is BRANCH:
***************
*** 168,176 ****
                  lo = lo + i
                  hi = hi + j
!             elif op in (MIN_REPEAT, MAX_REPEAT):
                  i, j = av[2].getwidth()
                  lo = lo + long(i) * av[0]
                  hi = hi + long(j) * av[1]
!             elif op in (ANY, RANGE, IN, LITERAL, NOT_LITERAL, CATEGORY):
                  lo = lo + 1
                  hi = hi + 1
--- 171,179 ----
                  lo = lo + i
                  hi = hi + j
!             elif op in REPEATCODES:
                  i, j = av[2].getwidth()
                  lo = lo + long(i) * av[0]
                  hi = hi + long(j) * av[1]
!             elif op in UNITCODES:
                  lo = lo + 1
                  hi = hi + 1
***************
*** 314,324 ****
  
      items = []
      while 1:
!         items.append(_parse(source, state))
!         if source.match("|"):
              continue
          if not nested:
              break
!         if not source.next or source.match(")", 0):
              break
          else:
--- 317,329 ----
  
      items = []
+     itemsappend = items.append
+     sourcematch = source.match
      while 1:
!         itemsappend(_parse(source, state))
!         if sourcematch("|"):
              continue
          if not nested:
              break
!         if not source.next or sourcematch(")", 0):
              break
          else:
***************
*** 329,332 ****
--- 334,338 ----
  
      subpattern = SubPattern(state)
+     subpatternappend = subpattern.append
  
      # check if all items share a common prefix
***************
*** 345,349 ****
              for item in items:
                  del item[0]
!             subpattern.append(prefix)
              continue # check next one
          break
--- 351,355 ----
              for item in items:
                  del item[0]
!             subpatternappend(prefix)
              continue # check next one
          break
***************
*** 357,363 ****
          # branch (the compiler may optimize this even more)
          set = []
          for item in items:
!             set.append(item[0])
!         subpattern.append((IN, set))
          return subpattern
  
--- 363,370 ----
          # branch (the compiler may optimize this even more)
          set = []
+         setappend = set.append
          for item in items:
!             setappend(item[0])
!         subpatternappend((IN, set))
          return subpattern
  
***************
*** 381,392 ****
  def _parse(source, state):
      # parse a simple pattern
- 
      subpattern = SubPattern(state)
  
      while 1:
  
!         if source.next in ("|", ")"):
              break # end of subpattern
!         this = source.get()
          if this is None:
              break # end of pattern
--- 388,408 ----
  def _parse(source, state):
      # parse a simple pattern
      subpattern = SubPattern(state)
  
+     # precompute constants into local variables
+     subpatternappend = subpattern.append
+     sourceget = source.get
+     sourcematch = source.match
+     _len = len
+     PATTERNENDERS = ("|", ")")
+     ASSERTCHARS = ("=", "!", "<")
+     LOOKBEHINDASSERTCHARS = ("=", "!")
+     REPEATCODES = (MIN_REPEAT, MAX_REPEAT)
+ 
      while 1:
  
!         if source.next in PATTERNENDERS:
              break # end of subpattern
!         this = sourceget()
          if this is None:
              break # end of pattern
***************
*** 398,402 ****
              if this == "#":
                  while 1:
!                     this = source.get()
                      if this in (None, "\n"):
                          break
--- 414,418 ----
              if this == "#":
                  while 1:
!                     this = sourceget()
                      if this in (None, "\n"):
                          break
***************
*** 404,420 ****
  
          if this and this[0] not in SPECIAL_CHARS:
!             subpattern.append((LITERAL, ord(this)))
  
          elif this == "[":
              # character set
              set = []
! ##          if source.match(":"):
  ##              pass # handle character classes
!             if source.match("^"):
!                 set.append((NEGATE, None))
              # check remaining characters
              start = set[:]
              while 1:
!                 this = source.get()
                  if this == "]" and set != start:
                      break
--- 420,437 ----
  
          if this and this[0] not in SPECIAL_CHARS:
!             subpatternappend((LITERAL, ord(this)))
  
          elif this == "[":
              # character set
              set = []
!             setappend = set.append
! ##          if sourcematch(":"):
  ##              pass # handle character classes
!             if sourcematch("^"):
!                 setappend((NEGATE, None))
              # check remaining characters
              start = set[:]
              while 1:
!                 this = sourceget()
                  if this == "]" and set != start:
                      break
***************
*** 425,436 ****
                  else:
                      raise error, "unexpected end of regular expression"
!                 if source.match("-"):
                      # potential range
!                     this = source.get()
                      if this == "]":
                          if code1[0] is IN:
                              code1 = code1[1][0]
!                         set.append(code1)
!                         set.append((LITERAL, ord("-")))
                          break
                      elif this:
--- 442,453 ----
                  else:
                      raise error, "unexpected end of regular expression"
!                 if sourcematch("-"):
                      # potential range
!                     this = sourceget()
                      if this == "]":
                          if code1[0] is IN:
                              code1 = code1[1][0]
!                         setappend(code1)
!                         setappend((LITERAL, ord("-")))
                          break
                      elif this:
***************
*** 445,449 ****
                          if hi < lo:
                              raise error, "bad character range"
!                         set.append((RANGE, (lo, hi)))
                      else:
                          raise error, "unexpected end of regular expression"
--- 462,466 ----
                          if hi < lo:
                              raise error, "bad character range"
!                         setappend((RANGE, (lo, hi)))
                      else:
                          raise error, "unexpected end of regular expression"
***************
*** 451,464 ****
                      if code1[0] is IN:
                          code1 = code1[1][0]
!                     set.append(code1)
  
              # XXX: <fl> should move set optimization to compiler!
!             if len(set)==1 and set[0][0] is LITERAL:
!                 subpattern.append(set[0]) # optimization
!             elif len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
!                 subpattern.append((NOT_LITERAL, set[1][1])) # optimization
              else:
                  # XXX: <fl> should add charmap optimization here
!                 subpattern.append((IN, set))
  
          elif this and this[0] in REPEAT_CHARS:
--- 468,481 ----
                      if code1[0] is IN:
                          code1 = code1[1][0]
!                     setappend(code1)
  
              # XXX: <fl> should move set optimization to compiler!
!             if _len(set)==1 and set[0][0] is LITERAL:
!                 subpatternappend(set[0]) # optimization
!             elif _len(set)==2 and set[0][0] is NEGATE and set[1][0] is LITERAL:
!                 subpatternappend((NOT_LITERAL, set[1][1])) # optimization
              else:
                  # XXX: <fl> should add charmap optimization here
!                 subpatternappend((IN, set))
  
          elif this and this[0] in REPEAT_CHARS:
***************
*** 477,487 ****
                  while source.next in DIGITS:
                      lo = lo + source.get()
!                 if source.match(","):
                      while source.next in DIGITS:
!                         hi = hi + source.get()
                  else:
                      hi = lo
!                 if not source.match("}"):
!                     subpattern.append((LITERAL, ord(this)))
                      source.seek(here)
                      continue
--- 494,504 ----
                  while source.next in DIGITS:
                      lo = lo + source.get()
!                 if sourcematch(","):
                      while source.next in DIGITS:
!                         hi = hi + sourceget()
                  else:
                      hi = lo
!                 if not sourcematch("}"):
!                     subpatternappend((LITERAL, ord(this)))
                      source.seek(here)
                      continue
***************
*** 499,507 ****
              else:
                  item = None
!             if not item or (len(item) == 1 and item[0][0] == AT):
                  raise error, "nothing to repeat"
!             if item[0][0] in (MIN_REPEAT, MAX_REPEAT):
                  raise error, "multiple repeat"
!             if source.match("?"):
                  subpattern[-1] = (MIN_REPEAT, (min, max, item))
              else:
--- 516,524 ----
              else:
                  item = None
!             if not item or (_len(item) == 1 and item[0][0] == AT):
                  raise error, "nothing to repeat"
!             if item[0][0] in REPEATCODES:
                  raise error, "multiple repeat"
!             if sourcematch("?"):
                  subpattern[-1] = (MIN_REPEAT, (min, max, item))
              else:
***************
*** 509,513 ****
  
          elif this == ".":
!             subpattern.append((ANY, None))
  
          elif this == "(":
--- 526,530 ----
  
          elif this == ".":
!             subpatternappend((ANY, None))
  
          elif this == "(":
***************
*** 515,528 ****
              name = None
              condgroup = None
!             if source.match("?"):
                  group = 0
                  # options
!                 if source.match("P"):
                      # python extensions
!                     if source.match("<"):
                          # named group: skip forward to end of name
                          name = ""
                          while 1:
!                             char = source.get()
                              if char is None:
                                  raise error, "unterminated name"
--- 532,545 ----
              name = None
              condgroup = None
!             if sourcematch("?"):
                  group = 0
                  # options
!                 if sourcematch("P"):
                      # python extensions
!                     if sourcematch("<"):
                          # named group: skip forward to end of name
                          name = ""
                          while 1:
!                             char = sourceget()
                              if char is None:
                                  raise error, "unterminated name"
***************
*** 533,541 ****
                          if not isname(name):
                              raise error, "bad character in group name"
!                     elif source.match("="):
                          # named backreference
                          name = ""
                          while 1:
!                             char = source.get()
                              if char is None:
                                  raise error, "unterminated name"
--- 550,558 ----
                          if not isname(name):
                              raise error, "bad character in group name"
!                     elif sourcematch("="):
                          # named backreference
                          name = ""
                          while 1:
!                             char = sourceget()
                              if char is None:
                                  raise error, "unterminated name"
***************
*** 548,592 ****
                          if gid is None:
                              raise error, "unknown group name"
!                         subpattern.append((GROUPREF, gid))
                          continue
                      else:
!                         char = source.get()
                          if char is None:
                              raise error, "unexpected end of pattern"
                          raise error, "unknown specifier: ?P%s" % char
!                 elif source.match(":"):
                      # non-capturing group
                      group = 2
!                 elif source.match("#"):
                      # comment
                      while 1:
                          if source.next is None or source.next == ")":
                              break
!                         source.get()
!                     if not source.match(")"):
                          raise error, "unbalanced parenthesis"
                      continue
!                 elif source.next in ("=", "!", "<"):
                      # lookahead assertions
!                     char = source.get()
                      dir = 1
                      if char == "<":
!                         if source.next not in ("=", "!"):
                              raise error, "syntax error"
                          dir = -1 # lookbehind
!                         char = source.get()
                      p = _parse_sub(source, state)
!                     if not source.match(")"):
                          raise error, "unbalanced parenthesis"
                      if char == "=":
!                         subpattern.append((ASSERT, (dir, p)))
                      else:
!                         subpattern.append((ASSERT_NOT, (dir, p)))
                      continue
!                 elif source.match("("):
                      # conditional backreference group
                      condname = ""
                      while 1:
!                         char = source.get()
                          if char is None:
                              raise error, "unterminated name"
--- 565,609 ----
                          if gid is None:
                              raise error, "unknown group name"
!                         subpatternappend((GROUPREF, gid))
                          continue
                      else:
!                         char = sourceget()
                          if char is None:
                              raise error, "unexpected end of pattern"
                          raise error, "unknown specifier: ?P%s" % char
!                 elif sourcematch(":"):
                      # non-capturing group
                      group = 2
!                 elif sourcematch("#"):
                      # comment
                      while 1:
                          if source.next is None or source.next == ")":
                              break
!                         sourceget()
!                     if not sourcematch(")"):
                          raise error, "unbalanced parenthesis"
                      continue
!                 elif source.next in ASSERTCHARS:
                      # lookahead assertions
!                     char = sourceget()
                      dir = 1
                      if char == "<":
!                         if source.next not in LOOKBEHINDASSERTCHARS:
                              raise error, "syntax error"
                          dir = -1 # lookbehind
!                         char = sourceget()
                      p = _parse_sub(source, state)
!                     if not sourcematch(")"):
                          raise error, "unbalanced parenthesis"
                      if char == "=":
!                         subpatternappend((ASSERT, (dir, p)))
                      else:
!                         subpatternappend((ASSERT_NOT, (dir, p)))
                      continue
!                 elif sourcematch("("):
                      # conditional backreference group
                      condname = ""
                      while 1:
!                         char = sourceget()
                          if char is None:
                              raise error, "unterminated name"
***************
*** 609,613 ****
                          raise error, "unexpected end of pattern"
                      while source.next in FLAGS:
!                         state.flags = state.flags | FLAGS[source.get()]
              if group:
                  # parse group contents
--- 626,630 ----
                          raise error, "unexpected end of pattern"
                      while source.next in FLAGS:
!                         state.flags = state.flags | FLAGS[sourceget()]
              if group:
                  # parse group contents
***************
*** 621,632 ****
                  else:
                      p = _parse_sub(source, state)
!                 if not source.match(")"):
                      raise error, "unbalanced parenthesis"
                  if group is not None:
                      state.closegroup(group)
!                 subpattern.append((SUBPATTERN, (group, p)))
              else:
                  while 1:
!                     char = source.get()
                      if char is None:
                          raise error, "unexpected end of pattern"
--- 638,649 ----
                  else:
                      p = _parse_sub(source, state)
!                 if not sourcematch(")"):
                      raise error, "unbalanced parenthesis"
                  if group is not None:
                      state.closegroup(group)
!                 subpatternappend((SUBPATTERN, (group, p)))
              else:
                  while 1:
!                     char = sourceget()
                      if char is None:
                          raise error, "unexpected end of pattern"
***************
*** 636,640 ****
  
          elif this == "^":
!             subpattern.append((AT, AT_BEGINNING))
  
          elif this == "$":
--- 653,657 ----
  
          elif this == "^":
!             subpatternappend((AT, AT_BEGINNING))
  
          elif this == "$":
***************
*** 643,647 ****
          elif this and this[0] == "\\":
              code = _escape(source, this, state)
!             subpattern.append(code)
  
          else:
--- 660,664 ----
          elif this and this[0] == "\\":
              code = _escape(source, this, state)
!             subpatternappend(code)
  
          else:
***************
*** 682,692 ****
      # group references
      s = Tokenizer(source)
      p = []
      a = p.append
!     def literal(literal, p=p):
          if p and p[-1][0] is LITERAL:
              p[-1] = LITERAL, p[-1][1] + literal
          else:
!             p.append((LITERAL, literal))
      sep = source[:0]
      if type(sep) is type(""):
--- 699,710 ----
      # group references
      s = Tokenizer(source)
+     sget = s.get
      p = []
      a = p.append
!     def literal(literal, p=p, pappend=a):
          if p and p[-1][0] is LITERAL:
              p[-1] = LITERAL, p[-1][1] + literal
          else:
!             pappend((LITERAL, literal))
      sep = source[:0]
      if type(sep) is type(""):
***************
*** 695,699 ****
          makechar = unichr
      while 1:
!         this = s.get()
          if this is None:
              break # end of replacement string
--- 713,717 ----
          makechar = unichr
      while 1:
!         this = sget()
          if this is None:
              break # end of replacement string
***************
*** 704,708 ****
                  if s.match("<"):
                      while 1:
!                         char = s.get()
                          if char is None:
                              raise error, "unterminated group name"
--- 722,726 ----
                  if s.match("<"):
                      while 1:
!                         char = sget()
                          if char is None:
                              raise error, "unterminated group name"
***************
*** 732,736 ****
                              break
                      elif s.next in OCTDIGITS:
!                         this = this + s.get()
                      else:
                          break
--- 750,754 ----
                              break
                      elif s.next in OCTDIGITS:
!                         this = this + sget()
                      else:
                          break
***************
*** 753,763 ****
      i = 0
      groups = []
!     literals = []
      for c, s in p:
          if c is MARK:
!             groups.append((i, s))
!             literals.append(None)
          else:
!             literals.append(s)
          i = i + 1
      return groups, literals
--- 771,782 ----
      i = 0
      groups = []
!     groupsappend = groups.append
!     literals = [None] * len(p)
      for c, s in p:
          if c is MARK:
!             groupsappend((i, s))
!             # literal[i] is already None
          else:
!             literals[i] = s
          i = i + 1
      return groups, literals




More information about the Python-checkins mailing list