[Python-checkins] cpython: #16009: JSON error messages now provide more information. Patch by Serhiy

ezio.melotti python-checkins at python.org
Thu Jan 3 07:44:28 CET 2013


http://hg.python.org/cpython/rev/17b4bf9e9f43
changeset:   81270:17b4bf9e9f43
parent:      81268:d81d4b3059e4
user:        Ezio Melotti <ezio.melotti at gmail.com>
date:        Thu Jan 03 08:44:15 2013 +0200
summary:
  #16009: JSON error messages now provide more information.  Patch by Serhiy Storchaka.

files:
  Lib/json/decoder.py              |  14 +-
  Lib/json/scanner.py              |   4 +-
  Lib/test/json_tests/test_fail.py |  77 ++++++++++++++++++++
  Misc/NEWS                        |   2 +
  Modules/_json.c                  |  48 ++++++------
  5 files changed, 112 insertions(+), 33 deletions(-)


diff --git a/Lib/json/decoder.py b/Lib/json/decoder.py
--- a/Lib/json/decoder.py
+++ b/Lib/json/decoder.py
@@ -188,8 +188,8 @@
 
         try:
             value, end = scan_once(s, end)
-        except StopIteration:
-            raise ValueError(errmsg("Expecting object", s, end))
+        except StopIteration as err:
+            raise ValueError(errmsg("Expecting value", s, err.value)) from None
         pairs_append((key, value))
         try:
             nextchar = s[end]
@@ -232,8 +232,8 @@
     while True:
         try:
             value, end = scan_once(s, end)
-        except StopIteration:
-            raise ValueError(errmsg("Expecting object", s, end))
+        except StopIteration as err:
+            raise ValueError(errmsg("Expecting value", s, err.value)) from None
         _append(value)
         nextchar = s[end:end + 1]
         if nextchar in _ws:
@@ -243,7 +243,7 @@
         if nextchar == ']':
             break
         elif nextchar != ',':
-            raise ValueError(errmsg("Expecting ',' delimiter", s, end))
+            raise ValueError(errmsg("Expecting ',' delimiter", s, end - 1))
         try:
             if s[end] in _ws:
                 end += 1
@@ -358,6 +358,6 @@
         """
         try:
             obj, end = self.scan_once(s, idx)
-        except StopIteration:
-            raise ValueError("No JSON object could be decoded")
+        except StopIteration as err:
+            raise ValueError(errmsg("Expecting value", s, err.value)) from None
         return obj, end
diff --git a/Lib/json/scanner.py b/Lib/json/scanner.py
--- a/Lib/json/scanner.py
+++ b/Lib/json/scanner.py
@@ -29,7 +29,7 @@
         try:
             nextchar = string[idx]
         except IndexError:
-            raise StopIteration
+            raise StopIteration(idx)
 
         if nextchar == '"':
             return parse_string(string, idx + 1, strict)
@@ -60,7 +60,7 @@
         elif nextchar == '-' and string[idx:idx + 9] == '-Infinity':
             return parse_constant('-Infinity'), idx + 9
         else:
-            raise StopIteration
+            raise StopIteration(idx)
 
     def scan_once(string, idx):
         try:
diff --git a/Lib/test/json_tests/test_fail.py b/Lib/test/json_tests/test_fail.py
--- a/Lib/test/json_tests/test_fail.py
+++ b/Lib/test/json_tests/test_fail.py
@@ -1,4 +1,5 @@
 from test.json_tests import PyTest, CTest
+import re
 
 # 2007-10-05
 JSONDOCS = [
@@ -100,6 +101,82 @@
         #This is for python encoder
         self.assertRaises(TypeError, self.dumps, data, indent=True)
 
+    def test_truncated_input(self):
+        test_cases = [
+            ('', 'Expecting value', 0),
+            ('[', 'Expecting value', 1),
+            ('[42', "Expecting ',' delimiter", 3),
+            ('[42,', 'Expecting value', 4),
+            ('["', 'Unterminated string starting at', 1),
+            ('["spam', 'Unterminated string starting at', 1),
+            ('["spam"', "Expecting ',' delimiter", 7),
+            ('["spam",', 'Expecting value', 8),
+            ('{', 'Expecting property name enclosed in double quotes', 1),
+            ('{"', 'Unterminated string starting at', 1),
+            ('{"spam', 'Unterminated string starting at', 1),
+            ('{"spam"', "Expecting ':' delimiter", 7),
+            ('{"spam":', 'Expecting value', 8),
+            ('{"spam":42', "Expecting ',' delimiter", 10),
+            ('{"spam":42,', 'Expecting property name enclosed in double quotes', 11),
+        ]
+        test_cases += [
+            ('"', 'Unterminated string starting at', 0),
+            ('"spam', 'Unterminated string starting at', 0),
+        ]
+        for data, msg, idx in test_cases:
+            self.assertRaisesRegex(ValueError,
+                r'^{0}: line 1 column {1} \(char {1}\)'.format(
+                    re.escape(msg), idx),
+                self.loads, data)
+
+    def test_unexpected_data(self):
+        test_cases = [
+            ('[,', 'Expecting value', 1),
+            ('{"spam":[}', 'Expecting value', 9),
+            ('[42:', "Expecting ',' delimiter", 3),
+            ('[42 "spam"', "Expecting ',' delimiter", 4),
+            ('[42,]', 'Expecting value', 4),
+            ('{"spam":[42}', "Expecting ',' delimiter", 11),
+            ('["]', 'Unterminated string starting at', 1),
+            ('["spam":', "Expecting ',' delimiter", 7),
+            ('["spam",]', 'Expecting value', 8),
+            ('{:', 'Expecting property name enclosed in double quotes', 1),
+            ('{,', 'Expecting property name enclosed in double quotes', 1),
+            ('{42', 'Expecting property name enclosed in double quotes', 1),
+            ('[{]', 'Expecting property name enclosed in double quotes', 2),
+            ('{"spam",', "Expecting ':' delimiter", 7),
+            ('{"spam"}', "Expecting ':' delimiter", 7),
+            ('[{"spam"]', "Expecting ':' delimiter", 8),
+            ('{"spam":}', 'Expecting value', 8),
+            ('[{"spam":]', 'Expecting value', 9),
+            ('{"spam":42 "ham"', "Expecting ',' delimiter", 11),
+            ('[{"spam":42]', "Expecting ',' delimiter", 11),
+            ('{"spam":42,}', 'Expecting property name enclosed in double quotes', 11),
+        ]
+        for data, msg, idx in test_cases:
+            self.assertRaisesRegex(ValueError,
+                r'^{0}: line 1 column {1} \(char {1}\)'.format(
+                    re.escape(msg), idx),
+                self.loads, data)
+
+    def test_extra_data(self):
+        test_cases = [
+            ('[]]', 'Extra data', 2),
+            ('{}}', 'Extra data', 2),
+            ('[],[]', 'Extra data', 2),
+            ('{},{}', 'Extra data', 2),
+        ]
+        test_cases += [
+            ('42,"spam"', 'Extra data', 2),
+            ('"spam",42', 'Extra data', 6),
+        ]
+        for data, msg, idx in test_cases:
+            self.assertRaisesRegex(ValueError,
+                r'^{0}: line 1 column {1} - line 1 column {2}'
+                r' \(char {1} - {2}\)'.format(
+                    re.escape(msg), idx, len(data)),
+                self.loads, data)
+
 
 class TestPyFail(TestFail, PyTest): pass
 class TestCFail(TestFail, CTest): pass
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -201,6 +201,8 @@
 Library
 -------
 
+- Issue #16009: JSON error messages now provide more information.
+
 - Issue #16828: Fix error incorrectly raised by bz2.compress(b'') and
   bz2.BZ2Compressor.compress(b''). Initial patch by Martin Packman.
 
diff --git a/Modules/_json.c b/Modules/_json.c
--- a/Modules/_json.c
+++ b/Modules/_json.c
@@ -237,6 +237,16 @@
     }
 }
 
+static void
+raise_stop_iteration(Py_ssize_t idx)
+{
+    PyObject *value = PyLong_FromSsize_t(idx);
+    if (value != NULL) {
+        PyErr_SetObject(PyExc_StopIteration, value);
+        Py_DECREF(value);
+    }
+}
+
 static PyObject *
 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) {
     /* return (rval, idx) tuple, stealing reference to rval */
@@ -306,7 +316,7 @@
     buf = PyUnicode_DATA(pystr);
     kind = PyUnicode_KIND(pystr);
 
-    if (end < 0 || len <= end) {
+    if (end < 0 || len < end) {
         PyErr_SetString(PyExc_ValueError, "end is out of bounds");
         goto bail;
     }
@@ -604,12 +614,12 @@
     while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind,str, idx))) idx++;
 
     /* only loop if the object is non-empty */
-    if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != '}') {
-        while (idx <= end_idx) {
+    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
+        while (1) {
             PyObject *memokey;
 
             /* read key */
-            if (PyUnicode_READ(kind, str, idx) != '"') {
+            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '"') {
                 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx);
                 goto bail;
             }
@@ -666,11 +676,9 @@
             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
 
             /* bail if the object is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (PyUnicode_READ(kind, str, idx) == '}') {
+            if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == '}')
                 break;
-            }
-            else if (PyUnicode_READ(kind, str, idx) != ',') {
+            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
                 goto bail;
             }
@@ -681,12 +689,6 @@
         }
     }
 
-    /* verify that idx < end_idx, str[idx] should be '}' */
-    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != '}') {
-        raise_errmsg("Expecting object", pystr, end_idx);
-        goto bail;
-    }
-
     *next_idx_ptr = idx + 1;
 
     if (has_pairs_hook) {
@@ -738,8 +740,8 @@
     while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
 
     /* only loop if the array is non-empty */
-    if (idx <= end_idx && PyUnicode_READ(kind, str, idx) != ']') {
-        while (idx <= end_idx) {
+    if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
+        while (1) {
 
             /* read any JSON term  */
             val = scan_once_unicode(s, pystr, idx, &next_idx);
@@ -756,11 +758,9 @@
             while (idx <= end_idx && IS_WHITESPACE(PyUnicode_READ(kind, str, idx))) idx++;
 
             /* bail if the array is closed or we didn't get the , delimiter */
-            if (idx > end_idx) break;
-            if (PyUnicode_READ(kind, str, idx) == ']') {
+            if (idx <= end_idx && PyUnicode_READ(kind, str, idx) == ']')
                 break;
-            }
-            else if (PyUnicode_READ(kind, str, idx) != ',') {
+            if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ',') {
                 raise_errmsg("Expecting ',' delimiter", pystr, idx);
                 goto bail;
             }
@@ -773,7 +773,7 @@
 
     /* verify that idx < end_idx, PyUnicode_READ(kind, str, idx) should be ']' */
     if (idx > end_idx || PyUnicode_READ(kind, str, idx) != ']') {
-        raise_errmsg("Expecting object", pystr, end_idx);
+        raise_errmsg("Expecting value", pystr, end_idx);
         goto bail;
     }
     *next_idx_ptr = idx + 1;
@@ -841,7 +841,7 @@
     if (PyUnicode_READ(kind, str, idx) == '-') {
         idx++;
         if (idx > end_idx) {
-            PyErr_SetNone(PyExc_StopIteration);
+            raise_stop_iteration(start);
             return NULL;
         }
     }
@@ -857,7 +857,7 @@
     }
     /* no integer digits, error */
     else {
-        PyErr_SetNone(PyExc_StopIteration);
+        raise_stop_iteration(start);
         return NULL;
     }
 
@@ -950,7 +950,7 @@
     length = PyUnicode_GET_LENGTH(pystr);
 
     if (idx >= length) {
-        PyErr_SetNone(PyExc_StopIteration);
+        raise_stop_iteration(idx);
         return NULL;
     }
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list