[issue13169] Regular expressions with 0 to 65536 repetitions raises OverflowError

Serhiy Storchaka report at bugs.python.org
Thu Jan 31 16:23:27 CET 2013


Serhiy Storchaka added the comment:

Here are patches for 2.7, 3.2 and updated patch for 3.3+ 
(test_repeat_minmax_overflow_maxrepeat is changed).

----------
Added file: http://bugs.python.org/file28919/re_maxrepeat4-2.7.patch
Added file: http://bugs.python.org/file28920/re_maxrepeat4-3.2.patch
Added file: http://bugs.python.org/file28921/re_maxrepeat4.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue13169>
_______________________________________
-------------- next part --------------
diff -r df9f8feb7444 Lib/sre_compile.py
--- a/Lib/sre_compile.py	Thu Jan 31 16:10:15 2013 +0200
+++ b/Lib/sre_compile.py	Thu Jan 31 17:18:46 2013 +0200
@@ -13,6 +13,7 @@
 import _sre, sys
 import sre_parse
 from sre_constants import *
+from _sre import MAXREPEAT
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
diff -r df9f8feb7444 Lib/sre_constants.py
--- a/Lib/sre_constants.py	Thu Jan 31 16:10:15 2013 +0200
+++ b/Lib/sre_constants.py	Thu Jan 31 17:18:46 2013 +0200
@@ -15,10 +15,6 @@
 
 MAGIC = 20031017
 
-# max code word in this release
-
-MAXREPEAT = 65535
-
 # SRE standard exception (access as sre.error)
 # should this really be here?
 
diff -r df9f8feb7444 Lib/sre_parse.py
--- a/Lib/sre_parse.py	Thu Jan 31 16:10:15 2013 +0200
+++ b/Lib/sre_parse.py	Thu Jan 31 17:18:46 2013 +0200
@@ -15,6 +15,7 @@
 import sys
 
 from sre_constants import *
+from _sre import MAXREPEAT
 
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS = "*+?{"
@@ -498,10 +499,18 @@
                     continue
                 if lo:
                     min = int(lo)
+                    if MAXREPEAT <= min <= sys.maxsize:
+                        raise error("the repetition number is too large")
                 if hi:
                     max = int(hi)
-                if max < min:
-                    raise error, "bad repeat interval"
+                    if max < min:
+                        raise error("bad repeat interval")
+                    if max >= MAXREPEAT:
+                        if max <= sys.maxsize:
+                            raise error("the repetition number is too large")
+                        max = MAXREPEAT
+                if min > MAXREPEAT:
+                    min = MAXREPEAT
             else:
                 raise error, "not supported"
             # figure out which item to repeat
diff -r df9f8feb7444 Lib/test/test_re.py
--- a/Lib/test/test_re.py	Thu Jan 31 16:10:15 2013 +0200
+++ b/Lib/test/test_re.py	Thu Jan 31 17:18:46 2013 +0200
@@ -1,5 +1,5 @@
 from test.test_support import verbose, run_unittest, import_module
-from test.test_support import precisionbigmemtest, _2G
+from test.test_support import precisionbigmemtest, _2G, cpython_only
 import re
 from re import Scanner
 import sys
@@ -847,6 +847,39 @@
         self.assertEqual(n, size + 1)
 
 
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertIsNone(re.match(r".{%d}" % 2**128, string))
+        self.assertEqual(re.match(r".{,%d}" % 2**128, string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % 2**128, string))
+        self.assertRaises(re.error, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        if MAXREPEAT > sys.maxsize:
+            self.skipTest('requires _sre.MAXREPEAT <= sys.maxsize')
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(re.error, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
diff -r df9f8feb7444 Modules/_sre.c
--- a/Modules/_sre.c	Thu Jan 31 16:10:15 2013 +0200
+++ b/Modules/_sre.c	Thu Jan 31 17:18:46 2013 +0200
@@ -524,7 +524,7 @@
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
         end = ptr + maxcount;
 
     switch (pattern[0]) {
@@ -1139,7 +1139,7 @@
             } else {
                 /* general case */
                 LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
+                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
                     state->ptr = ctx->ptr;
                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1225,7 +1225,7 @@
             }
 
             if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
+                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
                 state->ptr != ctx->u.rep->last_ptr) {
                 /* we may have enough matches, but if we can
                    match another item, do so */
@@ -1303,7 +1303,7 @@
             LASTMARK_RESTORE();
 
             if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
+                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
                 RETURN_FAILURE;
 
             ctx->u.rep->count = ctx->count;
@@ -3042,7 +3042,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
@@ -3061,7 +3061,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
@@ -3938,6 +3938,12 @@
         Py_DECREF(x);
     }
 
+    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+    if (x) {
+        PyDict_SetItemString(d, "MAXREPEAT", x);
+        Py_DECREF(x);
+    }
+
     x = PyString_FromString(copyright);
     if (x) {
         PyDict_SetItemString(d, "copyright", x);
diff -r df9f8feb7444 Modules/sre.h
--- a/Modules/sre.h	Thu Jan 31 16:10:15 2013 +0200
+++ b/Modules/sre.h	Thu Jan 31 17:18:46 2013 +0200
@@ -16,9 +16,19 @@
 /* size of a code word (must be unsigned short or larger, and
    large enough to hold a UCS4 character) */
 #ifdef Py_USING_UNICODE
-#define SRE_CODE Py_UCS4
+# define SRE_CODE Py_UCS4
+# if SIZEOF_SIZE_T > 4
+#  define SRE_MAXREPEAT (~(SRE_CODE)0)
+# else
+#  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+# endif
 #else
-#define SRE_CODE unsigned long
+# define SRE_CODE unsigned long
+# if SIZEOF_SIZE_T > SIZEOF_LONG
+#  define SRE_MAXREPEAT (~(SRE_CODE)0)
+# else
+#  define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+# endif
 #endif
 
 typedef struct {
-------------- next part --------------
diff -r 9c0cd608464e Lib/sre_compile.py
--- a/Lib/sre_compile.py	Thu Jan 31 16:11:04 2013 +0200
+++ b/Lib/sre_compile.py	Thu Jan 31 17:14:59 2013 +0200
@@ -13,6 +13,7 @@
 import _sre, sys
 import sre_parse
 from sre_constants import *
+from _sre import MAXREPEAT
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
diff -r 9c0cd608464e Lib/sre_constants.py
--- a/Lib/sre_constants.py	Thu Jan 31 16:11:04 2013 +0200
+++ b/Lib/sre_constants.py	Thu Jan 31 17:14:59 2013 +0200
@@ -15,10 +15,6 @@
 
 MAGIC = 20031017
 
-# max code word in this release
-
-MAXREPEAT = 65535
-
 # SRE standard exception (access as sre.error)
 # should this really be here?
 
diff -r 9c0cd608464e Lib/sre_parse.py
--- a/Lib/sre_parse.py	Thu Jan 31 16:11:04 2013 +0200
+++ b/Lib/sre_parse.py	Thu Jan 31 17:14:59 2013 +0200
@@ -15,6 +15,7 @@
 import sys
 
 from sre_constants import *
+from _sre import MAXREPEAT
 
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS = "*+?{"
@@ -505,10 +506,18 @@
                     continue
                 if lo:
                     min = int(lo)
+                    if MAXREPEAT <= min <= sys.maxsize:
+                        raise error("the repetition number is too large")
                 if hi:
                     max = int(hi)
-                if max < min:
-                    raise error("bad repeat interval")
+                    if max < min:
+                        raise error("bad repeat interval")
+                    if max >= MAXREPEAT:
+                        if max <= sys.maxsize:
+                            raise error("the repetition number is too large")
+                        max = MAXREPEAT
+                if min > MAXREPEAT:
+                    min = MAXREPEAT
             else:
                 raise error("not supported")
             # figure out which item to repeat
diff -r 9c0cd608464e Lib/test/test_re.py
--- a/Lib/test/test_re.py	Thu Jan 31 16:11:04 2013 +0200
+++ b/Lib/test/test_re.py	Thu Jan 31 17:14:59 2013 +0200
@@ -1,4 +1,5 @@
-from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
+from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
+        cpython_only
 import io
 import re
 from re import Scanner
@@ -883,6 +884,39 @@
         self.assertEqual(n, size + 1)
 
 
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertIsNone(re.match(r".{%d}" % 2**128, string))
+        self.assertEqual(re.match(r".{,%d}" % 2**128, string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % 2**128, string))
+        self.assertRaises(re.error, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        if MAXREPEAT > sys.maxsize:
+            self.skipTest('requires _sre.MAXREPEAT <= sys.maxsize')
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(re.error, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
diff -r 9c0cd608464e Modules/_sre.c
--- a/Modules/_sre.c	Thu Jan 31 16:11:04 2013 +0200
+++ b/Modules/_sre.c	Thu Jan 31 17:14:59 2013 +0200
@@ -517,7 +517,7 @@
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < end - ptr && maxcount != 65535)
+    if (maxcount < end - ptr && maxcount != SRE_MAXREPEAT)
         end = ptr + maxcount;
 
     switch (pattern[0]) {
@@ -1132,7 +1132,7 @@
             } else {
                 /* general case */
                 LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
+                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
                     state->ptr = ctx->ptr;
                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1218,7 +1218,7 @@
             }
 
             if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
+                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
                 state->ptr != ctx->u.rep->last_ptr) {
                 /* we may have enough matches, but if we can
                    match another item, do so */
@@ -1296,7 +1296,7 @@
             LASTMARK_RESTORE();
 
             if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
+                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
                 RETURN_FAILURE;
 
             ctx->u.rep->count = ctx->count;
@@ -3072,7 +3072,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
@@ -3091,7 +3091,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
@@ -3979,6 +3979,12 @@
         Py_DECREF(x);
     }
 
+    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+    if (x) {
+        PyDict_SetItemString(d, "MAXREPEAT", x);
+        Py_DECREF(x);
+    }
+
     x = PyUnicode_FromString(copyright);
     if (x) {
         PyDict_SetItemString(d, "copyright", x);
diff -r 9c0cd608464e Modules/sre.h
--- a/Modules/sre.h	Thu Jan 31 16:11:04 2013 +0200
+++ b/Modules/sre.h	Thu Jan 31 17:14:59 2013 +0200
@@ -16,6 +16,11 @@
 /* size of a code word (must be unsigned short or larger, and
    large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
+#if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+#else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+#endif
 
 typedef struct {
     PyObject_VAR_HEAD
-------------- next part --------------
diff -r e6cc582cafce Lib/sre_compile.py
--- a/Lib/sre_compile.py	Thu Jan 31 16:11:47 2013 +0200
+++ b/Lib/sre_compile.py	Thu Jan 31 17:21:55 2013 +0200
@@ -13,6 +13,7 @@
 import _sre, sys
 import sre_parse
 from sre_constants import *
+from _sre import MAXREPEAT
 
 assert _sre.MAGIC == MAGIC, "SRE module mismatch"
 
diff -r e6cc582cafce Lib/sre_constants.py
--- a/Lib/sre_constants.py	Thu Jan 31 16:11:47 2013 +0200
+++ b/Lib/sre_constants.py	Thu Jan 31 17:21:55 2013 +0200
@@ -15,10 +15,6 @@
 
 MAGIC = 20031017
 
-# max code word in this release
-
-MAXREPEAT = 65535
-
 # SRE standard exception (access as sre.error)
 # should this really be here?
 
diff -r e6cc582cafce Lib/sre_parse.py
--- a/Lib/sre_parse.py	Thu Jan 31 16:11:47 2013 +0200
+++ b/Lib/sre_parse.py	Thu Jan 31 17:21:55 2013 +0200
@@ -15,6 +15,7 @@
 import sys
 
 from sre_constants import *
+from _sre import MAXREPEAT
 
 SPECIAL_CHARS = ".\\[{()*+?^$|"
 REPEAT_CHARS = "*+?{"
@@ -537,10 +538,18 @@
                     continue
                 if lo:
                     min = int(lo)
+                    if MAXREPEAT <= min <= sys.maxsize:
+                        raise error("the repetition number is too large")
                 if hi:
                     max = int(hi)
-                if max < min:
-                    raise error("bad repeat interval")
+                    if max < min:
+                        raise error("bad repeat interval")
+                    if max >= MAXREPEAT:
+                        if max <= sys.maxsize:
+                            raise error("the repetition number is too large")
+                        max = MAXREPEAT
+                if min > MAXREPEAT:
+                    min = MAXREPEAT
             else:
                 raise error("not supported")
             # figure out which item to repeat
diff -r e6cc582cafce Lib/test/test_re.py
--- a/Lib/test/test_re.py	Thu Jan 31 16:11:47 2013 +0200
+++ b/Lib/test/test_re.py	Thu Jan 31 17:21:55 2013 +0200
@@ -1,4 +1,5 @@
-from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G
+from test.support import verbose, run_unittest, gc_collect, bigmemtest, _2G, \
+        cpython_only
 import io
 import re
 from re import Scanner
@@ -980,6 +981,39 @@
         self.assertEqual(re.findall(r"(?i)(a)\1", "aa \u0100"), ['a'])
         self.assertEqual(re.match(r"(?s).{1,3}", "\u0100\u0100").span(), (0, 2))
 
+    def test_repeat_minmax_overflow(self):
+        # Issue #13169
+        string = "x" * 100000
+        self.assertEqual(re.match(r".{65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{,65535}", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65535,}?", string).span(), (0, 65535))
+        self.assertEqual(re.match(r".{65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{,65536}", string).span(), (0, 65536))
+        self.assertEqual(re.match(r".{65536,}?", string).span(), (0, 65536))
+        # 2**128 should be big enough to overflow both SRE_CODE and Py_ssize_t.
+        self.assertIsNone(re.match(r".{%d}" % 2**128, string))
+        self.assertEqual(re.match(r".{,%d}" % 2**128, string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % 2**128, string))
+        self.assertRaises(re.error, re.compile, r".{%d,%d}" % (2**129, 2**128))
+
+    @cpython_only
+    def test_repeat_minmax_overflow_maxrepeat(self):
+        try:
+            from _sre import MAXREPEAT
+        except ImportError:
+            self.skipTest('requires _sre.MAXREPEAT constant')
+        if MAXREPEAT > sys.maxsize:
+            self.skipTest('requires _sre.MAXREPEAT <= sys.maxsize')
+        self.assertIsNone(re.match(r".{%d}" % (MAXREPEAT - 1), string))
+        self.assertEqual(re.match(r".{,%d}" % (MAXREPEAT - 1), string).span(),
+                         (0, 100000))
+        self.assertIsNone(re.match(r".{%d,}?" % (MAXREPEAT - 1), string))
+        self.assertRaises(re.error, re.compile, r".{%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{,%d}" % MAXREPEAT)
+        self.assertRaises(re.error, re.compile, r".{%d,}?" % MAXREPEAT)
+
+
 def run_re_tests():
     from test.re_tests import tests, SUCCEED, FAIL, SYNTAX_ERROR
     if verbose:
diff -r e6cc582cafce Modules/_sre.c
--- a/Modules/_sre.c	Thu Jan 31 16:11:47 2013 +0200
+++ b/Modules/_sre.c	Thu Jan 31 17:21:55 2013 +0200
@@ -492,7 +492,7 @@
     Py_ssize_t i;
 
     /* adjust end */
-    if (maxcount < (end - ptr) / state->charsize && maxcount != 65535)
+    if (maxcount < (end - ptr) / state->charsize && maxcount != SRE_MAXREPEAT)
         end = ptr + maxcount*state->charsize;
 
     switch (pattern[0]) {
@@ -1109,7 +1109,7 @@
             } else {
                 /* general case */
                 LASTMARK_SAVE();
-                while ((Py_ssize_t)ctx->pattern[2] == 65535
+                while ((Py_ssize_t)ctx->pattern[2] == SRE_MAXREPEAT
                        || ctx->count <= (Py_ssize_t)ctx->pattern[2]) {
                     state->ptr = ctx->ptr;
                     DO_JUMP(JUMP_MIN_REPEAT_ONE,jump_min_repeat_one,
@@ -1195,7 +1195,7 @@
             }
 
             if ((ctx->count < ctx->u.rep->pattern[2] ||
-                ctx->u.rep->pattern[2] == 65535) &&
+                ctx->u.rep->pattern[2] == SRE_MAXREPEAT) &&
                 state->ptr != ctx->u.rep->last_ptr) {
                 /* we may have enough matches, but if we can
                    match another item, do so */
@@ -1273,7 +1273,7 @@
             LASTMARK_RESTORE();
 
             if (ctx->count >= ctx->u.rep->pattern[2]
-                && ctx->u.rep->pattern[2] != 65535)
+                && ctx->u.rep->pattern[2] != SRE_MAXREPEAT)
                 RETURN_FAILURE;
 
             ctx->u.rep->count = ctx->count;
@@ -3037,7 +3037,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-4, groups))
                     FAIL;
@@ -3056,7 +3056,7 @@
                 GET_ARG; max = arg;
                 if (min > max)
                     FAIL;
-                if (max > 65535)
+                if (max > SRE_MAXREPEAT)
                     FAIL;
                 if (!_validate_inner(code, code+skip-3, groups))
                     FAIL;
@@ -3942,6 +3942,12 @@
         Py_DECREF(x);
     }
 
+    x = PyLong_FromUnsignedLong(SRE_MAXREPEAT);
+    if (x) {
+        PyDict_SetItemString(d, "MAXREPEAT", x);
+        Py_DECREF(x);
+    }
+
     x = PyUnicode_FromString(copyright);
     if (x) {
         PyDict_SetItemString(d, "copyright", x);
diff -r e6cc582cafce Modules/sre.h
--- a/Modules/sre.h	Thu Jan 31 16:11:47 2013 +0200
+++ b/Modules/sre.h	Thu Jan 31 17:21:55 2013 +0200
@@ -16,6 +16,11 @@
 /* size of a code word (must be unsigned short or larger, and
    large enough to hold a UCS4 character) */
 #define SRE_CODE Py_UCS4
+#if SIZEOF_SIZE_T > 4
+# define SRE_MAXREPEAT (~(SRE_CODE)0)
+#else
+# define SRE_MAXREPEAT ((SRE_CODE)PY_SSIZE_T_MAX + 1u)
+#endif
 
 typedef struct {
     PyObject_VAR_HEAD


More information about the Python-bugs-list mailing list