[issue16335] Integer overflow in unicode-escape decoder

Fri Nov 9 15:41:48 CET 2012

Serhiy Storchaka added the comment:

Here are patches for different Python versions. Test added. Victor, now you can try it on 12GB.  Unfortunately, I can't run the tests.

----------
Added file: http://bugs.python.org/file27933/decode_unicode_escape_overflow-3.3.patch
Added file: http://bugs.python.org/file27934/decode_unicode_escape_overflow-3.2.patch
Added file: http://bugs.python.org/file27935/decode_unicode_escape_overflow-2.7.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue16335>
_______________________________________
-------------- next part --------------
diff -r d170844a363f Lib/test/test_ucn.py

--- a/Lib/test/test_ucn.py	Fri Nov 09 01:09:27 2012 +0200
+++ b/Lib/test/test_ucn.py	Fri Nov 09 16:36:22 2012 +0200
@@ -9,6 +9,7 @@
 
 import unittest
 import unicodedata
+import _testcapi
 
 from test import support
 from http.client import HTTPException
@@ -215,6 +216,20 @@
             str, b"\\NSPACE", 'unicode-escape', 'strict'
         )
 
+    @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
+                         "needs UINT_MAX < SIZE_MAX")
+    @support.bigmemtest(size=_testcapi.UINT_MAX + 1 + len(b'\\N{SPACE}') + 1,
+                        memuse=1 + 1, dry_run=False)
+    def test_issue16335(self, size):
+        # very very long bogus character name
+        x = b'\\N{SPACE' + b'x' * (_testcapi.UINT_MAX + 1) + b'}'
+        self.assertEqual(len(x), len(b'\\N{SPACE}') + (_testcapi.UINT_MAX + 1))
+        self.assertRaisesRegex(UnicodeError,
+            'unknown Unicode character name',
+            x.decode, 'unicode-escape'
+        )
+
+
 def test_main():
     support.run_unittest(UnicodeNamesTest)
 
diff -r d170844a363f Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Nov 09 01:09:27 2012 +0200
+++ b/Objects/unicodeobject.c	Fri Nov 09 16:36:22 2012 +0200
@@ -5561,7 +5561,8 @@
                     /* found a name.  look it up in the unicode database */
                     message = "unknown Unicode character name";
                     s++;
-                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1),
+                    if (s - start - 1 <= INT_MAX &&
+                        ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1),
                                               &chr, 0))
                         goto store;
                 }
-------------- next part --------------
diff -r bb39ca6bcd7a Lib/test/test_ucn.py
--- a/Lib/test/test_ucn.py	Fri Nov 09 01:08:25 2012 +0200
+++ b/Lib/test/test_ucn.py	Fri Nov 09 16:36:32 2012 +0200
@@ -8,6 +8,7 @@
 """#"
 
 import unittest
+import _testcapi
 
 from test import support
 
@@ -141,6 +142,20 @@
             str, b"\\NSPACE", 'unicode-escape', 'strict'
         )
 
+    @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
+                         "needs UINT_MAX < SIZE_MAX")
+    @support.bigmemtest(size=_testcapi.UINT_MAX + 1 + len(b'\\N{SPACE}') + 1,
+                        memuse=1 + 4, dry_run=False)
+    def test_issue16335(self, size):
+        # very very long bogus character name
+        x = b'\\N{SPACE' + b'x' * (_testcapi.UINT_MAX + 1) + b'}'
+        self.assertEqual(len(x), len(b'\\N{SPACE}') + (_testcapi.UINT_MAX + 1))
+        self.assertRaisesRegex(UnicodeError,
+            'unknown Unicode character name',
+            x.decode, 'unicode-escape'
+        )
+
+
 def test_main():
     support.run_unittest(UnicodeNamesTest)
 
diff -r bb39ca6bcd7a Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Nov 09 01:08:25 2012 +0200
+++ b/Objects/unicodeobject.c	Fri Nov 09 16:36:32 2012 +0200
@@ -3919,7 +3919,8 @@
                     /* found a name.  look it up in the unicode database */
                     message = "unknown Unicode character name";
                     s++;
-                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
+                    if (s - start - 1 <= INT_MAX &&
+                        ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
                         goto store;
                 }
             }
-------------- next part --------------
diff -r 8b181c75792f Lib/test/test_ucn.py
--- a/Lib/test/test_ucn.py	Fri Nov 09 01:03:44 2012 +0200
+++ b/Lib/test/test_ucn.py	Fri Nov 09 16:37:11 2012 +0200
@@ -8,6 +8,7 @@
 """#"
 
 import unittest
+import _testcapi
 
 from test import test_support
 
@@ -137,6 +138,20 @@
             unicode, "\\NSPACE", 'unicode-escape', 'strict'
         )
 
+    @unittest.skipUnless(_testcapi.INT_MAX < _testcapi.PY_SSIZE_T_MAX,
+                         "needs UINT_MAX < SIZE_MAX")
+    @test_support.bigmemtest(minsize=_testcapi.UINT_MAX + 1 + len(b'\\N{SPACE}') + 1,
+                             memuse=1 + 4)
+    def test_issue16335(self, size):
+        # very very long bogus character name
+        x = b'\\N{SPACE' + b'x' * (_testcapi.UINT_MAX + 1) + b'}'
+        self.assertEqual(len(x), len(b'\\N{SPACE}') + (_testcapi.UINT_MAX + 1))
+        self.assertRaisesRegex(UnicodeError,
+            'unknown Unicode character name',
+            x.decode, 'unicode-escape'
+        )
+
+
 def test_main():
     test_support.run_unittest(UnicodeNamesTest)
 
diff -r 8b181c75792f Objects/unicodeobject.c
--- a/Objects/unicodeobject.c	Fri Nov 09 01:03:44 2012 +0200
+++ b/Objects/unicodeobject.c	Fri Nov 09 16:37:11 2012 +0200
@@ -2896,7 +2896,8 @@
                     /* found a name.  look it up in the unicode database */
                     message = "unknown Unicode character name";
                     s++;
-                    if (ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
+                    if (s - start - 1 <= INT_MAX &&
+                        ucnhash_CAPI->getcode(NULL, start, (int)(s-start-1), &chr))
                         goto store;
                 }
             }