[Python-checkins] CVS: python/dist/src/Lib sre.py,1.4,1.5 sre_compile.py,1.3,1.4 sre_constants.py,1.3,1.4
Jeremy Hylton
python-dev@python.org
Thu, 1 Jun 2000 10:39:14 -0700
Update of /cvsroot/python/python/dist/src/Lib
In directory slayer.i.sourceforge.net:/tmp/cvs-serv26344
Modified Files:
sre.py sre_compile.py sre_constants.py
Log Message:
Fredrik Lundh: here's the 96.6% version of SRE
Index: sre.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre.py,v
retrieving revision 1.4
retrieving revision 1.5
diff -C2 -r1.4 -r1.5
*** sre.py 2000/05/02 15:52:33 1.4
--- sre.py 2000/06/01 17:39:12 1.5
***************
*** 1,6 ****
- # -*- Mode: Python; tab-width: 4 -*-
#
# Secret Labs' Regular Expression Engine
! # $Id: sre.py,v 1.4 2000/05/02 15:52:33 guido Exp $
#
# re-compatible interface for the sre matching engine
--- 1,5 ----
#
# Secret Labs' Regular Expression Engine
! # $Id: sre.py,v 1.5 2000/06/01 17:39:12 jhylton Exp $
#
# re-compatible interface for the sre matching engine
***************
*** 8,14 ****
# Copyright (c) 1998-2000 by Secret Labs AB. All rights reserved.
#
- # This code can only be used for 1.6 alpha testing. All other use
- # require explicit permission from Secret Labs AB.
- #
# Portions of this engine have been developed in cooperation with
# CNRI. Hewlett-Packard provided funding for 1.6 integration and
--- 7,10 ----
***************
*** 16,45 ****
#
- """
- this is a long string
- """
-
import sre_compile
# --------------------------------------------------------------------
# public interface
! def compile(pattern, flags=0):
! return sre_compile.compile(pattern, _fixflags(flags))
def match(pattern, string, flags=0):
! return compile(pattern, _fixflags(flags)).match(string)
def search(pattern, string, flags=0):
! return compile(pattern, _fixflags(flags)).search(string)
! # FIXME: etc
# --------------------------------------------------------------------
! # helpers
! def _fixflags(flags):
! # convert flag bitmask to sequence
! assert not flags
! return ()
--- 12,132 ----
#
import sre_compile
+ # flags
+ I = IGNORECASE = sre_compile.SRE_FLAG_IGNORECASE
+ L = LOCALE = sre_compile.SRE_FLAG_LOCALE
+ M = MULTILINE = sre_compile.SRE_FLAG_MULTILINE
+ S = DOTALL = sre_compile.SRE_FLAG_DOTALL
+ X = VERBOSE = sre_compile.SRE_FLAG_VERBOSE
+
# --------------------------------------------------------------------
# public interface
! # FIXME: add docstrings
def match(pattern, string, flags=0):
! return _compile(pattern, flags).match(string)
def search(pattern, string, flags=0):
! return _compile(pattern, flags).search(string)
!
! def sub(pattern, repl, string, count=0):
! return _compile(pattern).sub(repl, string, count)
!
! def subn(pattern, repl, string, count=0):
! return _compile(pattern).subn(repl, string, count)
!
! def split(pattern, string, maxsplit=0):
! return _compile(pattern).split(string, maxsplit)
!
! def findall(pattern, string, maxsplit=0):
! return _compile(pattern).findall(string, maxsplit)
!
! def compile(pattern, flags=0):
! return _compile(pattern, flags)
! def escape(pattern):
! s = list(pattern)
! for i in range(len(pattern)):
! c = pattern[i]
! if not ("a" <= c <= "z" or "A" <= c <= "Z" or "0" <= c <= "9"):
! if c == "\000":
! s[i] = "\\000"
! else:
! s[i] = "\\" + c
! return pattern[:0].join(s)
# --------------------------------------------------------------------
! # internals
! _cache = {}
! _MAXCACHE = 100
+ def _compile(pattern, flags=0):
+ # internal: compile pattern
+ tp = type(pattern)
+ if tp not in (type(""), type(u"")):
+ return pattern
+ key = (tp, pattern, flags)
+ try:
+ return _cache[key]
+ except KeyError:
+ pass
+ p = sre_compile.compile(pattern, flags)
+ if len(_cache) >= _MAXCACHE:
+ _cache.clear()
+ _cache[key] = p
+ return p
+
+ def _sub(pattern, template, string, count=0):
+ # internal: pattern.sub implementation hook
+ return _subn(pattern, template, string, count)[0]
+
+ def _expand(match, template):
+ # internal: expand template
+ return template # FIXME
+
+ def _subn(pattern, template, string, count=0):
+ # internal: pattern.subn implementation hook
+ if callable(template):
+ filter = callable
+ else:
+ # FIXME: prepare template
+ def filter(match, template=template):
+ return _expand(match, template)
+ n = i = 0
+ s = []
+ append = s.append
+ c = pattern.cursor(string)
+ while not count or n < count:
+ m = c.search()
+ if not m:
+ break
+ j = m.start()
+ if j > i:
+ append(string[i:j])
+ append(filter(m))
+ i = m.end()
+ n = n + 1
+ if i < len(string):
+ append(string[i:])
+ return string[:0].join(s), n
+
+ def _split(pattern, string, maxsplit=0):
+ # internal: pattern.split implementation hook
+ n = i = 0
+ s = []
+ append = s.append
+ c = pattern.cursor(string)
+ while not maxsplit or n < maxsplit:
+ m = c.search()
+ if not m:
+ break
+ j = m.start()
+ append(string[i:j])
+ i = m.end()
+ n = n + 1
+ if i < len(string):
+ append(string[i:])
+ return s
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** sre_compile.py 2000/04/10 17:10:48 1.3
--- sre_compile.py 2000/06/01 17:39:12 1.4
***************
*** 1,5 ****
#
# Secret Labs' Regular Expression Engine
! # $Id: sre_compile.py,v 1.3 2000/04/10 17:10:48 guido Exp $
#
# convert template to internal format
--- 1,5 ----
#
# Secret Labs' Regular Expression Engine
! # $Id: sre_compile.py,v 1.4 2000/06/01 17:39:12 jhylton Exp $
#
# convert template to internal format
***************
*** 15,21 ****
#
- # FIXME: <fl> formalize (objectify?) and document the compiler code
- # format, so that other frontends can use this compiler
-
import array, string, sys
--- 15,18 ----
***************
*** 46,76 ****
def todata(self):
# print self.data
! return array.array(WORDSIZE, self.data).tostring()
!
! def _lower(literal):
! # return _sre._lower(literal) # FIXME
! return string.lower(literal)
! def _compile(code, pattern, flags):
append = code.append
for op, av in pattern:
if op is ANY:
! if "s" in flags:
! append(CODES[op]) # any character at all!
else:
! append(CODES[NOT_LITERAL])
! append(10)
elif op in (SUCCESS, FAILURE):
! append(CODES[op])
elif op is AT:
! append(CODES[op])
! append(POSITIONS[av])
elif op is BRANCH:
! append(CODES[op])
tail = []
for av in av[1]:
skip = len(code); append(0)
! _compile(code, av, flags)
! append(CODES[JUMP])
tail.append(len(code)); append(0)
code[skip] = len(code) - skip
--- 43,76 ----
def todata(self):
# print self.data
! try:
! return array.array(WORDSIZE, self.data).tostring()
! except OverflowError:
! print self.data
! raise
! def _compile(code, pattern, flags, level=0):
append = code.append
for op, av in pattern:
if op is ANY:
! if flags & SRE_FLAG_DOTALL:
! append(OPCODES[op]) # any character at all!
else:
! append(OPCODES[CATEGORY])
! append(CHCODES[CATEGORY_NOT_LINEBREAK])
elif op in (SUCCESS, FAILURE):
! append(OPCODES[op])
elif op is AT:
! append(OPCODES[op])
! if flags & SRE_FLAG_MULTILINE:
! append(ATCODES[AT_MULTILINE[av]])
! else:
! append(ATCODES[av])
elif op is BRANCH:
! append(OPCODES[op])
tail = []
for av in av[1]:
skip = len(code); append(0)
! _compile(code, av, flags, level)
! append(OPCODES[JUMP])
tail.append(len(code)); append(0)
code[skip] = len(code) - skip
***************
*** 79,107 ****
code[tail] = len(code) - tail
elif op is CALL:
! append(CODES[op])
skip = len(code); append(0)
! _compile(code, av, flags)
! append(CODES[SUCCESS])
code[skip] = len(code) - skip
elif op is CATEGORY: # not used by current parser
! append(CODES[op])
! append(CATEGORIES[av])
elif op is GROUP:
! if "i" in flags:
! append(CODES[MAP_IGNORE[op]])
else:
! append(CODES[op])
! append(av)
elif op is IN:
! if "i" in flags:
! append(CODES[MAP_IGNORE[op]])
def fixup(literal):
! return ord(_lower(literal))
else:
! append(CODES[op])
fixup = ord
skip = len(code); append(0)
for op, av in av:
! append(CODES[op])
if op is NEGATE:
pass
--- 79,110 ----
code[tail] = len(code) - tail
elif op is CALL:
! append(OPCODES[op])
skip = len(code); append(0)
! _compile(code, av, flags, level+1)
! append(OPCODES[SUCCESS])
code[skip] = len(code) - skip
elif op is CATEGORY: # not used by current parser
! append(OPCODES[op])
! if flags & SRE_FLAG_LOCALE:
! append(CH_LOCALE[CHCODES[av]])
! else:
! append(CHCODES[av])
elif op is GROUP:
! if flags & SRE_FLAG_IGNORECASE:
! append(OPCODES[OP_IGNORE[op]])
else:
! append(OPCODES[op])
! append(av-1)
elif op is IN:
! if flags & SRE_FLAG_IGNORECASE:
! append(OPCODES[OP_IGNORE[op]])
def fixup(literal):
! return ord(literal.lower())
else:
! append(OPCODES[op])
fixup = ord
skip = len(code); append(0)
for op, av in av:
! append(OPCODES[op])
if op is NEGATE:
pass
***************
*** 112,129 ****
append(fixup(av[1]))
elif op is CATEGORY:
! append(CATEGORIES[av])
else:
raise ValueError, "unsupported set operator"
! append(CODES[FAILURE])
code[skip] = len(code) - skip
elif op in (LITERAL, NOT_LITERAL):
! if "i" in flags:
! append(CODES[MAP_IGNORE[op]])
! append(ord(_lower(av)))
else:
! append(CODES[op])
append(ord(av))
elif op is MARK:
! append(CODES[op])
append(av)
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
--- 115,135 ----
append(fixup(av[1]))
elif op is CATEGORY:
! if flags & SRE_FLAG_LOCALE:
! append(CH_LOCALE[CHCODES[av]])
! else:
! append(CHCODES[av])
else:
raise ValueError, "unsupported set operator"
! append(OPCODES[FAILURE])
code[skip] = len(code) - skip
elif op in (LITERAL, NOT_LITERAL):
! if flags & SRE_FLAG_IGNORECASE:
! append(OPCODES[OP_IGNORE[op]])
! append(ord(av.lower()))
else:
! append(OPCODES[op])
append(ord(av))
elif op is MARK:
! append(OPCODES[op])
append(av)
elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
***************
*** 132,167 ****
raise SyntaxError, "cannot repeat zero-width items"
if lo == hi == 1 and op is MAX_REPEAT:
! append(CODES[MAX_REPEAT_ONE])
skip = len(code); append(0)
append(av[0])
append(av[1])
! _compile(code, av[2], flags)
! append(CODES[SUCCESS])
code[skip] = len(code) - skip
else:
! append(CODES[op])
skip = len(code); append(0)
append(av[0])
append(av[1])
! _compile(code, av[2], flags)
if op is MIN_REPEAT:
! append(CODES[MIN_UNTIL])
else:
! # FIXME: MAX_REPEAT PROBABLY DOESN'T WORK (?)
! append(CODES[MAX_UNTIL])
code[skip] = len(code) - skip
elif op is SUBPATTERN:
! ## group = av[0]
! ## if group:
! ## append(CODES[MARK])
! ## append((group-1)*2)
! _compile(code, av[1], flags)
! ## if group:
! ## append(CODES[MARK])
! ## append((group-1)*2+1)
else:
raise ValueError, ("unsupported operand type", op)
! def compile(p, flags=()):
# convert pattern list to internal format
if type(p) in (type(""), type(u"")):
--- 138,172 ----
raise SyntaxError, "cannot repeat zero-width items"
if lo == hi == 1 and op is MAX_REPEAT:
! append(OPCODES[MAX_REPEAT_ONE])
skip = len(code); append(0)
append(av[0])
append(av[1])
! _compile(code, av[2], flags, level+1)
! append(OPCODES[SUCCESS])
code[skip] = len(code) - skip
else:
! append(OPCODES[op])
skip = len(code); append(0)
append(av[0])
append(av[1])
! _compile(code, av[2], flags, level+1)
if op is MIN_REPEAT:
! append(OPCODES[MIN_UNTIL])
else:
! append(OPCODES[MAX_UNTIL])
code[skip] = len(code) - skip
elif op is SUBPATTERN:
! group = av[0]
! if group:
! append(OPCODES[MARK])
! append((group-1)*2)
! _compile(code, av[1], flags, level+1)
! if group:
! append(OPCODES[MARK])
! append((group-1)*2+1)
else:
raise ValueError, ("unsupported operand type", op)
! def compile(p, flags=0):
# convert pattern list to internal format
if type(p) in (type(""), type(u"")):
***************
*** 171,180 ****
else:
pattern = None
! # print p.getwidth()
! # print p
code = Code()
! _compile(code, p.data, p.pattern.flags)
! code.append(CODES[SUCCESS])
! # print list(code.data)
data = code.todata()
if 0: # debugging
--- 176,183 ----
else:
pattern = None
! flags = p.pattern.flags | flags
code = Code()
! _compile(code, p.data, flags)
! code.append(OPCODES[SUCCESS])
data = code.todata()
if 0: # debugging
***************
*** 184,187 ****
sre_disasm.disasm(data)
print "-" * 68
! # print len(data), p.pattern.groups, len(p.pattern.groupdict)
! return _sre.compile(pattern, data, p.pattern.groups-1, p.pattern.groupdict)
--- 187,193 ----
sre_disasm.disasm(data)
print "-" * 68
! return _sre.compile(
! pattern, flags,
! data,
! p.pattern.groups-1, p.pattern.groupdict
! )
Index: sre_constants.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_constants.py,v
retrieving revision 1.3
retrieving revision 1.4
diff -C2 -r1.3 -r1.4
*** sre_constants.py 2000/04/10 17:10:48 1.3
--- sre_constants.py 2000/06/01 17:39:12 1.4
***************
*** 1,5 ****
#
# Secret Labs' Regular Expression Engine
! # $Id: sre_constants.py,v 1.3 2000/04/10 17:10:48 guido Exp $
#
# various symbols used by the regular expression engine.
--- 1,5 ----
#
# Secret Labs' Regular Expression Engine
! # $Id: sre_constants.py,v 1.4 2000/06/01 17:39:12 jhylton Exp $
#
# various symbols used by the regular expression engine.
***************
*** 49,58 ****
# positions
AT_BEGINNING = "at_beginning"
AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end"
# categories
-
CATEGORY_DIGIT = "category_digit"
CATEGORY_NOT_DIGIT = "category_not_digit"
--- 49,59 ----
# positions
AT_BEGINNING = "at_beginning"
+ AT_BEGINNING_LINE = "at_beginning_line"
AT_BOUNDARY = "at_boundary"
AT_NON_BOUNDARY = "at_non_boundary"
AT_END = "at_end"
+ AT_END_LINE = "at_end_line"
# categories
CATEGORY_DIGIT = "category_digit"
CATEGORY_NOT_DIGIT = "category_not_digit"
***************
*** 61,66 ****
CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD = "category_not_word"
! CODES = [
# failure=0 success=1 (just because it looks better that way :-)
--- 62,77 ----
CATEGORY_WORD = "category_word"
CATEGORY_NOT_WORD = "category_not_word"
+ CATEGORY_LINEBREAK = "category_linebreak"
+ CATEGORY_NOT_LINEBREAK = "category_not_linebreak"
+ CATEGORY_LOC_DIGIT = "category_loc_digit"
+ CATEGORY_LOC_NOT_DIGIT = "category_loc_not_digit"
+ CATEGORY_LOC_SPACE = "category_loc_space"
+ CATEGORY_LOC_NOT_SPACE = "category_loc_not_space"
+ CATEGORY_LOC_WORD = "category_loc_word"
+ CATEGORY_LOC_NOT_WORD = "category_loc_not_word"
+ CATEGORY_LOC_LINEBREAK = "category_loc_linebreak"
+ CATEGORY_LOC_NOT_LINEBREAK = "category_loc_not_linebreak"
! OPCODES = [
# failure=0 success=1 (just because it looks better that way :-)
***************
*** 87,101 ****
]
! # convert to dictionary
! c = {}
! i = 0
! for code in CODES:
! c[code] = i
! i = i + 1
! CODES = c
# replacement operations for "ignore case" mode
! MAP_IGNORE = {
GROUP: GROUP_IGNORE,
IN: IN_IGNORE,
--- 98,130 ----
]
+
+ ATCODES = [
+ AT_BEGINNING, AT_BEGINNING_LINE, AT_BOUNDARY,
+ AT_NON_BOUNDARY, AT_END, AT_END_LINE
+ ]
! CHCODES = [
! CATEGORY_DIGIT, CATEGORY_NOT_DIGIT, CATEGORY_SPACE,
! CATEGORY_NOT_SPACE, CATEGORY_WORD, CATEGORY_NOT_WORD,
! CATEGORY_LINEBREAK, CATEGORY_NOT_LINEBREAK, CATEGORY_LOC_DIGIT,
! CATEGORY_LOC_NOT_DIGIT, CATEGORY_LOC_SPACE,
! CATEGORY_LOC_NOT_SPACE, CATEGORY_LOC_WORD, CATEGORY_LOC_NOT_WORD,
! CATEGORY_LOC_LINEBREAK, CATEGORY_LOC_NOT_LINEBREAK
! ]
!
! def makedict(list):
! d = {}
! i = 0
! for item in list:
! d[item] = i
! i = i + 1
! return d
!
! OPCODES = makedict(OPCODES)
! ATCODES = makedict(ATCODES)
! CHCODES = makedict(CHCODES)
# replacement operations for "ignore case" mode
! OP_IGNORE = {
GROUP: GROUP_IGNORE,
IN: IN_IGNORE,
***************
*** 104,131 ****
}
! POSITIONS = {
! AT_BEGINNING: ord("a"),
! AT_BOUNDARY: ord("b"),
! AT_NON_BOUNDARY: ord("B"),
! AT_END: ord("z"),
}
! CATEGORIES = {
! CATEGORY_DIGIT: ord("d"),
! CATEGORY_NOT_DIGIT: ord("D"),
! CATEGORY_SPACE: ord("s"),
! CATEGORY_NOT_SPACE: ord("S"),
! CATEGORY_WORD: ord("w"),
! CATEGORY_NOT_WORD: ord("W"),
}
if __name__ == "__main__":
import string
! items = CODES.items()
! items.sort(lambda a, b: cmp(a[1], b[1]))
f = open("sre_constants.h", "w")
! f.write("/* generated by sre_constants.py */\n")
! for k, v in items:
! f.write("#define SRE_OP_" + string.upper(k) + " " + str(v) + "\n")
f.close()
print "done"
--- 133,172 ----
}
! AT_MULTILINE = {
! AT_BEGINNING: AT_BEGINNING_LINE,
! AT_END: AT_END_LINE
}
! CH_LOCALE = {
! CATEGORY_DIGIT: CATEGORY_LOC_DIGIT,
! CATEGORY_NOT_DIGIT: CATEGORY_LOC_NOT_DIGIT,
! CATEGORY_SPACE: CATEGORY_LOC_SPACE,
! CATEGORY_NOT_SPACE: CATEGORY_LOC_NOT_SPACE,
! CATEGORY_WORD: CATEGORY_LOC_WORD,
! CATEGORY_NOT_WORD: CATEGORY_LOC_NOT_WORD,
! CATEGORY_LINEBREAK: CATEGORY_LOC_LINEBREAK,
! CATEGORY_NOT_LINEBREAK: CATEGORY_LOC_NOT_LINEBREAK
}
+ # flags
+ SRE_FLAG_TEMPLATE = 1 # NYI
+ SRE_FLAG_IGNORECASE = 2
+ SRE_FLAG_LOCALE = 4
+ SRE_FLAG_MULTILINE = 8
+ SRE_FLAG_DOTALL = 16
+ SRE_FLAG_VERBOSE = 32
+
if __name__ == "__main__":
import string
! def dump(f, d, prefix):
! items = d.items()
! items.sort(lambda a, b: cmp(a[1], b[1]))
! for k, v in items:
! f.write("#define %s_%s %s\n" % (prefix, string.upper(k), v))
f = open("sre_constants.h", "w")
! f.write("/* generated from sre_constants.py */\n")
! dump(f, OPCODES, "SRE_OP")
! dump(f, ATCODES, "SRE")
! dump(f, CHCODES, "SRE")
f.close()
print "done"