[Python-checkins] CVS: python/dist/src/Lib sre.py,1.20,1.21 sre_compile.py,1.25,1.26 sre_constants.py,1.17,1.18 sre_parse.py,1.25,1.26
Fredrik Lundh
python-dev@python.org
Sun, 23 Jul 2000 14:46:21 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv6140/Lib
Modified Files:
sre.py sre_compile.py sre_constants.py sre_parse.py
Log Message:
-- SRE 0.9.6 sync. this includes:
+ added "regs" attribute
+ fixed "pos" and "endpos" attributes
+ reset "lastindex" and "lastgroup" in scanner methods
+ removed (?P#id) syntax; the "lastindex" and "lastgroup"
attributes are now always set
+ removed string module dependencies in sre_parse
+ better debugging support in sre_parse
+ various tweaks to build under 1.5.2
Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.20
retrieving revision 1.21
diff -C2 -r1.20 -r1.21
*** sre.py 2000/07/02 22:59:57 1.20
--- sre.py 2000/07/23 21:46:17 1.21
***************
*** 11,17 ****
--- 11,21 ----
#
+ # FIXME: change all FIXME's to XXX ;-)
+
import sre_compile
import sre_parse
+ import string
+
# flags
I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
***************
*** 54,57 ****
--- 58,64 ----
return _compile(pattern, flags)
+ def purge():
+ _cache.clear()
+
def template(pattern, flags=0):
return _compile(pattern, flags|T)
***************
*** 66,70 ****
else:
s[i] = "\\" + c
! return pattern[:0].join(s)
# --------------------------------------------------------------------
--- 73,77 ----
else:
s[i] = "\\" + c
! return _join(s, pattern)
# --------------------------------------------------------------------
***************
*** 74,81 ****
_MAXCACHE = 100
def _compile(pattern, flags=0):
# internal: compile pattern
tp = type(pattern)
! if tp not in (type(""), type(u"")):
return pattern
key = (tp, pattern, flags)
--- 81,92 ----
_MAXCACHE = 100
+ def _join(seq, sep):
+ # internal: join into string having the same type as sep
+ return string.join(seq, sep[:0])
+
def _compile(pattern, flags=0):
# internal: compile pattern
tp = type(pattern)
! if tp not in sre_compile.STRING_TYPES:
return pattern
key = (tp, pattern, flags)
***************
*** 90,97 ****
return p
- def purge():
- # clear pattern cache
- _cache.clear()
-
def _sub(pattern, template, string, count=0):
# internal: pattern.sub implementation hook
--- 101,104 ----
***************
*** 121,125 ****
n = n + 1
append(string[i:])
! return string[:0].join(s), n
def _split(pattern, string, maxsplit=0):
--- 128,132 ----
n = n + 1
append(string[i:])
! return _join(s, string[:0]), n
def _split(pattern, string, maxsplit=0):
***************
*** 162,170 ****
class Scanner:
def __init__(self, lexicon):
self.lexicon = lexicon
p = []
for phrase, action in lexicon:
! p.append("(?:%s)(?P#%d)" % (phrase, len(p)))
! self.scanner = _compile("|".join(p))
def scan(self, string):
result = []
--- 169,185 ----
class Scanner:
def __init__(self, lexicon):
+ from sre_constants import BRANCH, SUBPATTERN, INDEX
self.lexicon = lexicon
+ # combine phrases into a compound pattern
p = []
+ s = sre_parse.Pattern()
for phrase, action in lexicon:
! p.append(sre_parse.SubPattern(s, [
! (SUBPATTERN, (None, sre_parse.parse(phrase))),
! (INDEX, len(p))
! ]))
! p = sre_parse.SubPattern(s, [(BRANCH, (None, p))])
! s.groups = len(p)
! self.scanner = sre_compile.compile(p)
def scan(self, string):
result = []
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -r1.25 -r1.26
*** sre_compile.py 2000/07/05 21:14:15 1.25
--- sre_compile.py 2000/07/23 21:46:17 1.26
***************
*** 198,205 ****
emit(ATCODES[av])
elif op is BRANCH:
- emit(OPCODES[op])
tail = []
for av in av[1]:
skip = len(code); emit(0)
_compile(code, av, flags)
emit(OPCODES[JUMP])
--- 198,206 ----
emit(ATCODES[av])
elif op is BRANCH:
tail = []
for av in av[1]:
+ emit(OPCODES[op])
skip = len(code); emit(0)
+ emit(MAXCODE) # save mark
_compile(code, av, flags)
emit(OPCODES[JUMP])
***************
*** 287,295 ****
code[skip] = len(code) - skip
def compile(p, flags=0):
# internal: convert pattern list to internal format
# compile, as necessary
! if type(p) in (type(""), type(u"")):
import sre_parse
pattern = p
--- 288,303 ----
code[skip] = len(code) - skip
+ STRING_TYPES = [type("")]
+
+ try:
+ STRING_TYPES.append(type(unicode("")))
+ except NameError:
+ pass
+
def compile(p, flags=0):
# internal: convert pattern list to internal format
# compile, as necessary
! if type(p) in STRING_TYPES:
import sre_parse
pattern = p
***************
*** 308,311 ****
--- 316,321 ----
code.append(OPCODES[SUCCESS])
+
+ # print code
# FIXME: <fl> get rid of this limitation!
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.17
retrieving revision 1.18
diff -C2 -r1.17 -r1.18
*** sre_constants.py 2000/07/16 12:04:30 1.17
--- sre_constants.py 2000/07/23 21:46:17 1.18
***************
*** 173,177 ****
SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
SRE_FLAG_IGNORECASE = 2 # case insensitive
! SRE_FLAG_LOCALE = 4 # honor system locale
SRE_FLAG_MULTILINE = 8 # treat target as multiline string
SRE_FLAG_DOTALL = 16 # treat target as a single string
--- 173,177 ----
SRE_FLAG_TEMPLATE = 1 # template mode (disable backtracking)
SRE_FLAG_IGNORECASE = 2 # case insensitive
! SRE_FLAG_LOCALE = 4 # honour system locale
SRE_FLAG_MULTILINE = 8 # treat target as multiline string
SRE_FLAG_DOTALL = 16 # treat target as a single string
Index: sre_parse.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_parse.py,v
retrieving revision 1.25
retrieving revision 1.26
diff -C2 -r1.25 -r1.26
*** sre_parse.py 2000/07/03 21:31:48 1.25
--- sre_parse.py 2000/07/23 21:46:17 1.26
***************
*** 26,35 ****
REPEAT_CHARS = "*+?{"
! DIGITS = tuple(string.digits)
OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF")
! WHITESPACE = tuple(string.whitespace)
ESCAPES = {
--- 26,35 ----
REPEAT_CHARS = "*+?{"
! DIGITS = tuple("012345689")
OCTDIGITS = tuple("01234567")
HEXDIGITS = tuple("0123456789abcdefABCDEF")
! WHITESPACE = tuple(" \t\n\r\v\f")
ESCAPES = {
***************
*** 69,73 ****
}
! class State:
def __init__(self):
self.flags = 0
--- 69,74 ----
}
! class Pattern:
! # master pattern object. keeps track of global attributes
def __init__(self):
self.flags = 0
***************
*** 89,92 ****
--- 90,120 ----
self.data = data
self.width = None
+ def dump(self, level=0):
+ nl = 1
+ for op, av in self.data:
+ print level*" " + op,; nl = 0
+ if op == "in":
+ # member sublanguage
+ print; nl = 1
+ for op, a in av:
+ print (level+1)*" " + op, a
+ elif op == "branch":
+ print; nl = 1
+ i = 0
+ for a in av[1]:
+ if i > 0:
+ print level*" " + "or"
+ a.dump(level+1); nl = 1
+ i = i + 1
+ elif type(av) in (type(()), type([])):
+ for a in av:
+ if isinstance(a, SubPattern):
+ if not nl: print
+ a.dump(level+1); nl = 1
+ else:
+ print a, ; nl = 0
+ else:
+ print av, ; nl = 0
+ if not nl: print
def __repr__(self):
return repr(self.data)
***************
*** 256,264 ****
raise error, "bogus escape: %s" % repr(escape)
! def _branch(pattern, items):
! # form a branch operator from a set of items
! subpattern = SubPattern(pattern)
# check if all items share a common prefix
while 1:
--- 284,307 ----
raise error, "bogus escape: %s" % repr(escape)
! def _parse_sub(source, state, nested=1):
! # parse an alternation: a|b|c
! items = []
! while 1:
! items.append(_parse(source, state))
! if source.match("|"):
! continue
! if not nested:
! break
! if not source.next or source.match(")"):
! break
! else:
! raise error, "pattern not properly closed"
+ if len(items) == 1:
+ return items[0]
+
+ subpattern = SubPattern(state)
+
# check if all items share a common prefix
while 1:
***************
*** 286,290 ****
else:
# we can store this as a character set instead of a
! # branch (FIXME: use a range if possible)
set = []
for item in items:
--- 329,333 ----
else:
# we can store this as a character set instead of a
! # branch (the compiler may optimize this even more)
set = []
for item in items:
***************
*** 297,302 ****
def _parse(source, state):
!
! # parse regular expression pattern into an operator list.
subpattern = SubPattern(state)
--- 340,344 ----
def _parse(source, state):
! # parse a simple pattern
subpattern = SubPattern(state)
***************
*** 452,471 ****
raise error, "unknown group name"
subpattern.append((GROUPREF, gid))
- elif source.match("#"):
- index = ""
- while 1:
- char = source.get()
- if char is None:
- raise error, "unterminated index"
- if char == ")":
- break
- index = index + char
- try:
- index = int(index)
- if index < 0 or index > MAXREPEAT:
- raise ValueError
- except ValueError:
- raise error, "illegal index"
- subpattern.append((INDEX, index))
continue
else:
--- 494,497 ----
***************
*** 492,511 ****
dir = -1 # lookbehind
char = source.get()
! b = []
! while 1:
! p = _parse(source, state)
! if source.next == ")":
! if b:
! b.append(p)
! p = _branch(state, b)
! if char == "=":
! subpattern.append((ASSERT, (dir, p)))
! else:
! subpattern.append((ASSERT_NOT, (dir, p)))
! break
! elif source.match("|"):
! b.append(p)
! else:
! raise error, "pattern not properly closed"
else:
# flags
--- 518,527 ----
dir = -1 # lookbehind
char = source.get()
! p = _parse_sub(source, state)
! if char == "=":
! subpattern.append((ASSERT, (dir, p)))
! else:
! subpattern.append((ASSERT_NOT, (dir, p)))
! continue
else:
# flags
***************
*** 514,518 ****
if group:
# parse group contents
- b = []
if group == 2:
# anonymous group
--- 530,533 ----
***************
*** 520,537 ****
else:
group = state.getgroup(name)
! while 1:
! p = _parse(source, state)
! if group is not None:
! p.append((INDEX, group))
! if source.match(")"):
! if b:
! b.append(p)
! p = _branch(state, b)
! subpattern.append((SUBPATTERN, (group, p)))
! break
! elif source.match("|"):
! b.append(p)
! else:
! raise error, "group not properly closed"
else:
while 1:
--- 535,542 ----
else:
group = state.getgroup(name)
! p = _parse_sub(source, state)
! subpattern.append((SUBPATTERN, (group, p)))
! if group is not None:
! p.append((INDEX, group))
else:
while 1:
***************
*** 556,579 ****
return subpattern
! def parse(pattern, flags=0):
# parse 're' pattern into list of (opcode, argument) tuples
! source = Tokenizer(pattern)
! state = State()
! state.flags = flags
! b = []
! while 1:
! p = _parse(source, state)
! tail = source.get()
! if tail == "|":
! b.append(p)
! elif tail == ")":
! raise error, "unbalanced parenthesis"
! elif tail is None:
! if b:
! b.append(p)
! p = _branch(state, b)
! break
! else:
! raise error, "bogus characters at end of regular expression"
return p
--- 561,582 ----
return subpattern
! def parse(str, flags=0):
# parse 're' pattern into list of (opcode, argument) tuples
!
! source = Tokenizer(str)
!
! pattern = Pattern()
! pattern.flags = flags
!
! p = _parse_sub(source, pattern, 0)
!
! tail = source.get()
! if tail == ")":
! raise error, "unbalanced parenthesis"
! elif tail:
! raise error, "bogus characters at end of regular expression"
!
! # p.dump()
!
return p
***************
*** 657,659 ****
raise error, "empty group"
a(s)
! return sep.join(p)
--- 660,662 ----
raise error, "empty group"
a(s)
! return string.join(p, sep)