[Python-checkins] cpython (merge 3.3 -> default): Issue #19327: Fixed the working of regular expressions with too big charset.

serhiy.storchaka python-checkins at python.org
Thu Oct 24 21:05:12 CEST 2013


http://hg.python.org/cpython/rev/10081a0ca4bd
changeset:   86601:10081a0ca4bd
parent:      86598:b9623fa5a0dd
parent:      86600:4431fa917f22
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Thu Oct 24 22:04:37 2013 +0300
summary:
  Issue #19327: Fixed the working of regular expressions with too big charset.

files:
  Lib/sre_compile.py  |  2 +-
  Lib/test/test_re.py |  3 +++
  Misc/NEWS           |  2 ++
  Modules/_sre.c      |  4 ++--
  4 files changed, 8 insertions(+), 3 deletions(-)


diff --git a/Lib/sre_compile.py b/Lib/sre_compile.py
--- a/Lib/sre_compile.py
+++ b/Lib/sre_compile.py
@@ -339,7 +339,7 @@
     else:
         code = 'I'
     # Convert block indices to byte array of 256 bytes
-    mapping = array.array('b', mapping).tobytes()
+    mapping = array.array('B', mapping).tobytes()
     # Convert byte array to word array
     mapping = array.array(code, mapping)
     assert mapping.itemsize == _sre.CODESIZE
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -482,6 +482,9 @@
                                   "\u2222").group(1), "\u2222")
         self.assertEqual(re.match("([\u2222\u2223])",
                                   "\u2222", re.UNICODE).group(1), "\u2222")
+        r = '[%s]' % ''.join(map(chr, range(256, 2**16, 255)))
+        self.assertEqual(re.match(r,
+                                  "\uff01", re.UNICODE).group(), "\uff01")
 
     def test_big_codesize(self):
         # Issue #1160
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -19,6 +19,8 @@
 Library
 -------
 
+- Issue #19327: Fixed the working of regular expressions with too big charset.
+
 - Issue #17400: New 'is_global' attribute for ipaddress to tell if an address
   is allocated by IANA for global or private networks.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -447,7 +447,7 @@
             count = *(set++);
 
             if (sizeof(SRE_CODE) == 2) {
-                block = ((char*)set)[ch >> 8];
+                block = ((unsigned char*)set)[ch >> 8];
                 set += 128;
                 if (set[block*16 + ((ch & 255)>>4)] & (1 << (ch & 15)))
                     return ok;
@@ -457,7 +457,7 @@
                 /* !(c & ~N) == (c < N+1) for any unsigned c, this avoids
                  * warnings when c's type supports only numbers < N+1 */
                 if (!(ch & ~65535))
-                    block = ((char*)set)[ch >> 8];
+                    block = ((unsigned char*)set)[ch >> 8];
                 else
                     block = -1;
                 set += 64;

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list