[Python-checkins] python/dist/src/Lib sre_compile.py,1.51,1.52
rhettinger at users.sourceforge.net
rhettinger at users.sourceforge.net
Fri Mar 26 06:17:12 EST 2004
Update of /cvsroot/python/python/dist/src/Lib
In directory sc8-pr-cvs1.sourceforge.net:/tmp/cvs-serv32139
Modified Files:
sre_compile.py
Log Message:
Simple optimizations:
* pre-build a single identity function for the fixup function
* pre-build membership tests in dictionaries instead of in-line tuples
* assign len() to a local variable
* assign append() methods to a local variable
* use xrange() instead of range()
* replace "x<<1" with "x+x"
Index: sre_compile.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/sre_compile.py,v
retrieving revision 1.51
retrieving revision 1.52
diff -C2 -d -r1.51 -r1.52
*** sre_compile.py 25 Mar 2004 13:50:59 -0000 1.51
--- sre_compile.py 26 Mar 2004 11:16:55 -0000 1.52
***************
*** 22,30 ****
MAXCODE = 0xFFFFFFFFL
def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit = code.append
for op, av in pattern:
! if op in (LITERAL, NOT_LITERAL):
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
--- 22,44 ----
MAXCODE = 0xFFFFFFFFL
+ def _identityfunction(x):
+ return x
+
+ # use xrange if available
+ try:
+ xrange
+ except NameError:
+ xrange = range
+
def _compile(code, pattern, flags):
# internal: compile a (sub)pattern
emit = code.append
+ _len = len
+ LITERAL_CODES = {LITERAL:1, NOT_LITERAL:1}
+ REPEATING_CODES = {REPEAT:1, MIN_REPEAT:1, MAX_REPEAT:1}
+ SUCCESS_CODES = {SUCCESS:1, FAILURE:1}
+ ASSERT_CODES = {ASSERT:1, ASSERT_NOT:1}
for op, av in pattern:
! if op in LITERAL_CODES:
if flags & SRE_FLAG_IGNORECASE:
emit(OPCODES[OP_IGNORE[op]])
***************
*** 40,47 ****
else:
emit(OPCODES[op])
! fixup = lambda x: x
! skip = len(code); emit(0)
_compile_charset(av, flags, code, fixup)
! code[skip] = len(code) - skip
elif op is ANY:
if flags & SRE_FLAG_DOTALL:
--- 54,61 ----
else:
emit(OPCODES[op])
! fixup = _identityfunction
! skip = _len(code); emit(0)
_compile_charset(av, flags, code, fixup)
! code[skip] = _len(code) - skip
elif op is ANY:
if flags & SRE_FLAG_DOTALL:
***************
*** 49,81 ****
else:
emit(OPCODES[ANY])
! elif op in (REPEAT, MIN_REPEAT, MAX_REPEAT):
if flags & SRE_FLAG_TEMPLATE:
raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT])
! skip = len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
! elif _simple(av) and op != REPEAT:
! if op == MAX_REPEAT:
emit(OPCODES[REPEAT_ONE])
else:
emit(OPCODES[MIN_REPEAT_ONE])
! skip = len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
else:
emit(OPCODES[REPEAT])
! skip = len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
! code[skip] = len(code) - skip
! if op == MAX_REPEAT:
emit(OPCODES[MAX_UNTIL])
else:
--- 63,95 ----
else:
emit(OPCODES[ANY])
! elif op in REPEATING_CODES:
if flags & SRE_FLAG_TEMPLATE:
raise error, "internal: unsupported template operator"
emit(OPCODES[REPEAT])
! skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
! code[skip] = _len(code) - skip
! elif _simple(av) and op is not REPEAT:
! if op is MAX_REPEAT:
emit(OPCODES[REPEAT_ONE])
else:
emit(OPCODES[MIN_REPEAT_ONE])
! skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
emit(OPCODES[SUCCESS])
! code[skip] = _len(code) - skip
else:
emit(OPCODES[REPEAT])
! skip = _len(code); emit(0)
emit(av[0])
emit(av[1])
_compile(code, av[2], flags)
! code[skip] = _len(code) - skip
! if op is MAX_REPEAT:
emit(OPCODES[MAX_UNTIL])
else:
***************
*** 90,98 ****
emit(OPCODES[MARK])
emit((av[0]-1)*2+1)
! elif op in (SUCCESS, FAILURE):
emit(OPCODES[op])
! elif op in (ASSERT, ASSERT_NOT):
emit(OPCODES[op])
! skip = len(code); emit(0)
if av[0] >= 0:
emit(0) # look ahead
--- 104,112 ----
emit(OPCODES[MARK])
emit((av[0]-1)*2+1)
! elif op in SUCCESS_CODES:
emit(OPCODES[op])
! elif op in ASSERT_CODES:
emit(OPCODES[op])
! skip = _len(code); emit(0)
if av[0] >= 0:
emit(0) # look ahead
***************
*** 104,114 ****
_compile(code, av[1], flags)
emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
elif op is CALL:
emit(OPCODES[op])
! skip = len(code); emit(0)
_compile(code, av, flags)
emit(OPCODES[SUCCESS])
! code[skip] = len(code) - skip
elif op is AT:
emit(OPCODES[op])
--- 118,128 ----
_compile(code, av[1], flags)
emit(OPCODES[SUCCESS])
! code[skip] = _len(code) - skip
elif op is CALL:
emit(OPCODES[op])
! skip = _len(code); emit(0)
_compile(code, av, flags)
emit(OPCODES[SUCCESS])
! code[skip] = _len(code) - skip
elif op is AT:
emit(OPCODES[op])
***************
*** 123,136 ****
emit(OPCODES[op])
tail = []
for av in av[1]:
! skip = len(code); emit(0)
# _compile_info(code, av, flags)
_compile(code, av, flags)
emit(OPCODES[JUMP])
! tail.append(len(code)); emit(0)
! code[skip] = len(code) - skip
emit(0) # end of branch
for tail in tail:
! code[tail] = len(code) - tail
elif op is CATEGORY:
emit(OPCODES[op])
--- 137,151 ----
emit(OPCODES[op])
tail = []
+ tailappend = tail.append
for av in av[1]:
! skip = _len(code); emit(0)
# _compile_info(code, av, flags)
_compile(code, av, flags)
emit(OPCODES[JUMP])
! tailappend(_len(code)); emit(0)
! code[skip] = _len(code) - skip
emit(0) # end of branch
for tail in tail:
! code[tail] = _len(code) - tail
elif op is CATEGORY:
emit(OPCODES[op])
***************
*** 149,162 ****
emit(OPCODES[op])
emit((av[0]-1)*2)
! skipyes = len(code); emit(0)
_compile(code, av[1], flags)
if av[2]:
emit(OPCODES[JUMP])
! skipno = len(code); emit(0)
! code[skipyes] = len(code) - skipyes + 1
_compile(code, av[2], flags)
! code[skipno] = len(code) - skipno
else:
! code[skipyes] = len(code) - skipyes + 1
else:
raise ValueError, ("unsupported operand type", op)
--- 164,177 ----
emit(OPCODES[op])
emit((av[0]-1)*2)
! skipyes = _len(code); emit(0)
_compile(code, av[1], flags)
if av[2]:
emit(OPCODES[JUMP])
! skipno = _len(code); emit(0)
! code[skipyes] = _len(code) - skipyes + 1
_compile(code, av[2], flags)
! code[skipno] = _len(code) - skipno
else:
! code[skipyes] = _len(code) - skipyes + 1
else:
raise ValueError, ("unsupported operand type", op)
***************
*** 166,170 ****
emit = code.append
if fixup is None:
! fixup = lambda x: x
for op, av in _optimize_charset(charset, fixup):
emit(OPCODES[op])
--- 181,185 ----
emit = code.append
if fixup is None:
! fixup = _identityfunction
for op, av in _optimize_charset(charset, fixup):
emit(OPCODES[op])
***************
*** 194,202 ****
# internal: optimize character set
out = []
charmap = [False]*256
try:
for op, av in charset:
if op is NEGATE:
! out.append((op, av))
elif op is LITERAL:
charmap[fixup(av)] = True
--- 209,218 ----
# internal: optimize character set
out = []
+ outappend = out.append
charmap = [False]*256
try:
for op, av in charset:
if op is NEGATE:
! outappend((op, av))
elif op is LITERAL:
charmap[fixup(av)] = True
***************
*** 213,216 ****
--- 229,233 ----
i = p = n = 0
runs = []
+ runsappend = runs.append
for c in charmap:
if c:
***************
*** 219,234 ****
n = n + 1
elif n:
! runs.append((p, n))
n = 0
i = i + 1
if n:
! runs.append((p, n))
if len(runs) <= 2:
# use literal/range
for p, n in runs:
if n == 1:
! out.append((LITERAL, p))
else:
! out.append((RANGE, (p, p+n-1)))
if len(out) < len(charset):
return out
--- 236,251 ----
n = n + 1
elif n:
! runsappend((p, n))
n = 0
i = i + 1
if n:
! runsappend((p, n))
if len(runs) <= 2:
# use literal/range
for p, n in runs:
if n == 1:
! outappend((LITERAL, p))
else:
! outappend((RANGE, (p, p+n-1)))
if len(out) < len(charset):
return out
***************
*** 236,240 ****
# use bitmap
data = _mk_bitmap(charmap)
! out.append((CHARSET, data))
return out
return charset
--- 253,257 ----
# use bitmap
data = _mk_bitmap(charmap)
! outappend((CHARSET, data))
return out
return charset
***************
*** 242,245 ****
--- 259,263 ----
def _mk_bitmap(bits):
data = []
+ dataappend = data.append
if _sre.CODESIZE == 2:
start = (1, 0)
***************
*** 250,256 ****
if c:
v = v + m
! m = m << 1
if m > MAXCODE:
! data.append(v)
m, v = start
return data
--- 268,274 ----
if c:
v = v + m
! m = m + m
if m > MAXCODE:
! dataappend(v)
m, v = start
return data
***************
*** 296,300 ****
charmap[fixup(av)] = True
elif op is RANGE:
! for i in range(fixup(av[0]), fixup(av[1])+1):
charmap[i] = True
elif op is CATEGORY:
--- 314,318 ----
charmap[fixup(av)] = True
elif op is RANGE:
! for i in xrange(fixup(av[0]), fixup(av[1])+1):
charmap[i] = True
elif op is CATEGORY:
***************
*** 308,312 ****
# XXX: negation does not work with big charsets
return charset
! for i in range(65536):
charmap[i] = not charmap[i]
comps = {}
--- 326,330 ----
# XXX: negation does not work with big charsets
return charset
! for i in xrange(65536):
charmap[i] = not charmap[i]
comps = {}
***************
*** 314,318 ****
block = 0
data = []
! for i in range(256):
chunk = tuple(charmap[i*256:(i+1)*256])
new = comps.setdefault(chunk, block)
--- 332,336 ----
block = 0
data = []
! for i in xrange(256):
chunk = tuple(charmap[i*256:(i+1)*256])
new = comps.setdefault(chunk, block)
***************
*** 349,354 ****
--- 367,374 ----
# look for a literal prefix
prefix = []
+ prefixappend = prefix.append
prefix_skip = 0
charset = [] # not used
+ charsetappend = charset.append
if not (flags & SRE_FLAG_IGNORECASE):
# look for literal prefix
***************
*** 357,365 ****
if len(prefix) == prefix_skip:
prefix_skip = prefix_skip + 1
! prefix.append(av)
elif op is SUBPATTERN and len(av[1]) == 1:
op, av = av[1][0]
if op is LITERAL:
! prefix.append(av)
else:
break
--- 377,385 ----
if len(prefix) == prefix_skip:
prefix_skip = prefix_skip + 1
! prefixappend(av)
elif op is SUBPATTERN and len(av[1]) == 1:
op, av = av[1][0]
if op is LITERAL:
! prefixappend(av)
else:
break
***************
*** 372,378 ****
op, av = av[1][0]
if op is LITERAL:
! charset.append((op, av))
elif op is BRANCH:
c = []
for p in av[1]:
if not p:
--- 392,399 ----
op, av = av[1][0]
if op is LITERAL:
! charsetappend((op, av))
elif op is BRANCH:
c = []
+ cappend = c.append
for p in av[1]:
if not p:
***************
*** 380,384 ****
op, av = p[0]
if op is LITERAL:
! c.append((op, av))
else:
break
--- 401,405 ----
op, av = p[0]
if op is LITERAL:
! cappend((op, av))
else:
break
***************
*** 387,390 ****
--- 408,412 ----
elif op is BRANCH:
c = []
+ cappend = c.append
for p in av[1]:
if not p:
***************
*** 392,396 ****
op, av = p[0]
if op is LITERAL:
! c.append((op, av))
else:
break
--- 414,418 ----
op, av = p[0]
if op is LITERAL:
! cappend((op, av))
else:
break
***************
*** 433,437 ****
# generate overlap table
table = [-1] + ([0]*len(prefix))
! for i in range(len(prefix)):
table[i+1] = table[i]+1
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
--- 455,459 ----
# generate overlap table
table = [-1] + ([0]*len(prefix))
! for i in xrange(len(prefix)):
table[i+1] = table[i]+1
while table[i+1] > 0 and prefix[i] != prefix[table[i+1]-1]:
More information about the Python-checkins
mailing list