[Python-checkins] cpython: Minor code clean up and improvements in the re module.

serhiy.storchaka python-checkins at python.org
Tue Nov 11 20:16:13 CET 2014


https://hg.python.org/cpython/rev/30a6c74ad87f
changeset:   93476:30a6c74ad87f
parent:      93474:d25c58a420d6
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Tue Nov 11 21:13:28 2014 +0200
summary:
  Minor code clean up and improvements in the re module.

files:
  Lib/re.py           |   2 +-
  Lib/sre_compile.py  |  16 ++++++----------
  Lib/sre_parse.py    |   8 ++++----
  Lib/test/test_re.py |   4 ++--
  4 files changed, 13 insertions(+), 17 deletions(-)


diff --git a/Lib/re.py b/Lib/re.py
--- a/Lib/re.py
+++ b/Lib/re.py
@@ -363,7 +363,7 @@
         append = result.append
         match = self.scanner.scanner(string).match
         i = 0
-        while 1:
+        while True:
             m = match()
             if not m:
                 break
diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -16,11 +16,6 @@
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
-if _sre.CODESIZE == 2:
-    MAXCODE = 65535
-else:
-    MAXCODE = 0xFFFFFFFF
-
 _LITERAL_CODES = {LITERAL, NOT_LITERAL}
 _REPEATING_CODES = {REPEAT, MIN_REPEAT, MAX_REPEAT}
 _SUCCESS_CODES = {SUCCESS, FAILURE}
@@ -191,7 +186,7 @@
                 emit(JUMP)
                 tailappend(_len(code)); emit(0)
                 code[skip] = _len(code) - skip
-            emit(0) # end of branch
+            emit(FAILURE) # end of branch
             for tail in tail:
                 code[tail] = _len(code) - tail
         elif op is CATEGORY:
@@ -374,6 +369,7 @@
     return out
 
 _CODEBITS = _sre.CODESIZE * 8
+MAXCODE = (1 << _CODEBITS) - 1
 _BITS_TRANS = b'0' + b'1' * 255
 def _mk_bitmap(bits, _CODEBITS=_CODEBITS, _int=int):
     s = bits.translate(_BITS_TRANS)[::-1]
@@ -477,9 +473,9 @@
             elif op is IN:
                 charset = av
 ##     if prefix:
-##         print "*** PREFIX", prefix, prefix_skip
+##         print("*** PREFIX", prefix, prefix_skip)
 ##     if charset:
-##         print "*** CHARSET", charset
+##         print("*** CHARSET", charset)
     # add an info block
     emit = code.append
     emit(INFO)
@@ -489,9 +485,9 @@
     if prefix:
         mask = SRE_INFO_PREFIX
         if len(prefix) == prefix_skip == len(pattern.data):
-            mask = mask + SRE_INFO_LITERAL
+            mask = mask | SRE_INFO_LITERAL
     elif charset:
-        mask = mask + SRE_INFO_CHARSET
+        mask = mask | SRE_INFO_CHARSET
     emit(mask)
     # pattern length
     if lo < MAXCODE:
diff --git a/Lib/sre_parse.py b/Lib/sre_parse.py
--- a/Lib/sre_parse.py
+++ b/Lib/sre_parse.py
@@ -103,18 +103,18 @@
         seqtypes = (tuple, list)
         for op, av in self.data:
             print(level*"  " + str(op), end='')
-            if op == IN:
+            if op is IN:
                 # member sublanguage
                 print()
                 for op, a in av:
                     print((level+1)*"  " + str(op), a)
-            elif op == BRANCH:
+            elif op is BRANCH:
                 print()
                 for i, a in enumerate(av[1]):
                     if i:
                         print(level*"  " + "OR")
                     a.dump(level+1)
-            elif op == GROUPREF_EXISTS:
+            elif op is GROUPREF_EXISTS:
                 condgroup, item_yes, item_no = av
                 print('', condgroup)
                 item_yes.dump(level+1)
@@ -607,7 +607,7 @@
                 item = subpattern[-1:]
             else:
                 item = None
-            if not item or (_len(item) == 1 and item[0][0] == AT):
+            if not item or (_len(item) == 1 and item[0][0] is AT):
                 raise source.error("nothing to repeat",
                                    source.tell() - here + len(this))
             if item[0][0] in _REPEATCODES:
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1101,8 +1101,8 @@
 
     def test_inline_flags(self):
         # Bug #1700
-        upper_char = chr(0x1ea0) # Latin Capital Letter A with Dot Bellow
-        lower_char = chr(0x1ea1) # Latin Small Letter A with Dot Bellow
+        upper_char = '\u1ea0' # Latin Capital Letter A with Dot Below
+        lower_char = '\u1ea1' # Latin Small Letter A with Dot Below
 
         p = re.compile(upper_char, re.I | re.U)
         q = p.match(lower_char)

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list