[Python-checkins] cpython: Minor code clean up and improvements in the re module.
serhiy.storchaka
python-checkins at python.org
Tue Nov 11 20:16:13 CET 2014
https://hg.python.org/cpython/rev/30a6c74ad87f
changeset: 93476:30a6c74ad87f
parent: 93474:d25c58a420d6
user: Serhiy Storchaka <storchaka at gmail.com>
date: Tue Nov 11 21:13:28 2014 +0200
summary:
Minor code clean up and improvements in the re module.
files:
Lib/re.py | 2 +-
Lib/sre_compile.py | 16 ++++++----------
Lib/sre_parse.py | 8 ++++----
Lib/test/test_re.py | 4 ++--
4 files changed, 13 insertions(+), 17 deletions(-)
diff --git a/Lib/re.py b/Lib/re.py
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -363,7 +363,7 @@
append = result.append
match = self.scanner.scanner(string).match
i = 0
- while 1:
+ while True:
m = match()
if not m:
break
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -16,11 +16,6 @@
assert _sre.MAGIC == MAGIC, "SRE module mismatch"
-if _sre.CODESIZE == 2:
- MAXCODE = 65535
-else:
- MAXCODE = 0xFFFFFFFF
-
_LITERAL_CODES = {LITERAL, NOT_LITERAL}
_REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
_SUCCESS_CODES = {SUCCESS, FAILURE}
@@ -191,7 +186,7 @@
emit(JUMP)
tailappend(_len(code)); emit(0)
code[skip] = _len(code) - skip
- emit(0) # end of branch
+ emit(FAILURE) # end of branch
for tail in tail:
code[tail] = _len(code) - tail
elif op is CATEGORY:
@@ -374,6 +369,7 @@
return out
_CODEBITS = _sre.CODESIZE * 8
+MAXCODE = (1 << _CODEBITS) - 1
_BITS_TRANS = b'0' + b'1' * 255
def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
s = bits.translate(_BITS_TRANS)[::-1]
@@ -477,9 +473,9 @@
elif op is IN:
charset = av
## if prefix:
-## print "*** PREFIX", prefix, prefix_skip
+## print("*** PREFIX", prefix, prefix_skip)
## if charset:
-## print "*** CHARSET", charset
+## print("*** CHARSET", charset)
# add an info block
emit = code.append
emit(INFO)
@@ -489,9 +485,9 @@
if prefix:
mask = SRE_INFO_PREFIX
if len(prefix) == prefix_skip == len(pattern.data):
- mask = mask + SRE_INFO_LITERAL
+ mask = mask | SRE_INFO_LITERAL
elif charset:
- mask = mask + SRE_INFO_CHARSET
+ mask = mask | SRE_INFO_CHARSET
emit(mask)
# pattern length
if lo < MAXCODE:
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -103,18 +103,18 @@
seqtypes = (tuple, list)
for op, av in self.data:
print(level*" " + str(op), end='')
- if op == IN:
+ if op is IN:
# member sublanguage
print()
for op, a in av:
print((level+1)*" " + str(op), a)
- elif op == BRANCH:
+ elif op is BRANCH:
print()
for i, a in enumerate(av[1]):
if i:
print(level*" " + "OR")
a.dump(level+1)
- elif op == GROUPREF_EXISTS:
+ elif op is GROUPREF_EXISTS:
condgroup, item_yes, item_no = av
print('', condgroup)
item_yes.dump(level+1)
@@ -607,7 +607,7 @@
item = subpattern[-1:]
else:
item = None
- if not item or (_len(item) == 1 and item[0][0] == AT):
+ if not item or (_len(item) == 1 and item[0][0] is AT):
raise source.error("nothing to repeat",
source.tell() - here + len(this))
if item[0][0] in _REPEATCODES:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1101,8 +1101,8 @@
def test_inline_flags(self):
# Bug #1700
- upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
- lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
+ upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
+ lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
p = re.compile(upper_char, re.I | re.U)
q = p.match(lower_char)
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list