[Python-checkins] cpython (3.2): Issue #1160: Fix compiling large regular expressions on UCS2 builds.

antoine.pitrou python-checkins at python.org
Tue Nov 20 22:38:11 CET 2012


http://hg.python.org/cpython/rev/a3579d766fb6
changeset:   80532:a3579d766fb6
branch:      3.2
parent:      80515:d2054aa9bed4
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Tue Nov 20 22:30:42 2012 +0100
summary:
  Issue #1160: Fix compiling large regular expressions on UCS2 builds.
Patch by Serhiy Storchaka.

files:
  Lib/test/test_re.py |   6 ++++++
  Misc/NEWS           |   3 +++
  Modules/_sre.c      |  11 +++++++----
  Modules/sre.h       |   6 +-----
  4 files changed, 17 insertions(+), 9 deletions(-)


diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -419,6 +419,12 @@
         self.assertEqual(re.match("([\u2222\u2223])",
                                   "\u2222", re.UNICODE).group(1), "\u2222")
 
+    def test_big_codesize(self):
+        # Issue #1160
+        r = re.compile('|'.join(('%d'%x for x in range(10000))))
+        self.assertIsNotNone(r.match('1000'))
+        self.assertIsNotNone(r.match('9999'))
+
     def test_anyall(self):
         self.assertEqual(re.match("a.b", "a\nb", re.DOTALL).group(0),
                          "a\nb")
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -167,6 +167,9 @@
 Library
 -------
 
+- Issue #1160: Fix compiling large regular expressions on UCS2 builds.
+  Patch by Serhiy Storchaka.
+
 - Issue #14313: zipfile now raises NotImplementedError when the compression
   type is unknown.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -2695,6 +2695,13 @@
     for (i = 0; i < n; i++) {
         PyObject *o = PyList_GET_ITEM(code, i);
         unsigned long value = PyLong_AsUnsignedLong(o);
+        if (value == (unsigned long)-1 && PyErr_Occurred()) {
+            if (PyErr_ExceptionMatches(PyExc_OverflowError)) {
+                PyErr_SetString(PyExc_OverflowError,
+                                "regular expression code size limit exceeded");
+            }
+            break;
+        }
         self->code[i] = (SRE_CODE) value;
         if ((unsigned long) self->code[i] != value) {
             PyErr_SetString(PyExc_OverflowError,
@@ -3065,10 +3072,8 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-#ifdef Py_UNICODE_WIDE
                 if (max > 65535)
                     FAIL;
-#endif
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
                 code += skip-4;
@@ -3086,10 +3091,8 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-#ifdef Py_UNICODE_WIDE
                 if (max > 65535)
                     FAIL;
-#endif
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
                 code += skip-3;
diff --git a/Modules/sre.h b/Modules/sre.h
--- a/Modules/sre.h
+++ b/Modules/sre.h
@@ -14,12 +14,8 @@
 #include "sre_constants.h"
 
 /* size of a code word (must be unsigned short or larger, and
-   large enough to hold a Py_UNICODE character) */
-#ifdef Py_UNICODE_WIDE
+   large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
-#else
-#define SRE_CODE unsigned short
-#endif
 
 typedef struct {
     PyObject_VAR_HEAD

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list