[pypy-commit] pypy py3.5: merge default
cfbolz
pypy.commits at gmail.com
Tue Apr 10 04:44:22 EDT 2018
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: py3.5
Changeset: r94295:3d2434de5907
Date: 2018-04-10 10:36 +0200
http://bitbucket.org/pypy/pypy/changeset/3d2434de5907/
Log: merge default
diff --git a/lib-python/2.7/test/test_eof.py b/lib-python/2.7/test/test_eof.py
--- a/lib-python/2.7/test/test_eof.py
+++ b/lib-python/2.7/test/test_eof.py
@@ -5,7 +5,7 @@
class EOFTestCase(unittest.TestCase):
def test_EOFC(self):
- expect = "EOL while scanning string literal (<string>, line 1)"
+ expect = "end of line (EOL) while scanning string literal (<string>, line 1)"
try:
eval("""'this is a test\
""")
@@ -15,7 +15,7 @@
raise test_support.TestFailed
def test_EOFS(self):
- expect = ("EOF while scanning triple-quoted string literal "
+ expect = ("end of file (EOF) while scanning triple-quoted string literal "
"(<string>, line 1)")
try:
eval("""'''this is a test""")
diff --git a/lib-python/2.7/test/test_traceback.py b/lib-python/2.7/test/test_traceback.py
--- a/lib-python/2.7/test/test_traceback.py
+++ b/lib-python/2.7/test/test_traceback.py
@@ -123,10 +123,7 @@
self.assertEqual(len(err), 4)
self.assertEqual(err[1].strip(), "print(2)")
self.assertIn("^", err[2])
- if check_impl_detail():
- self.assertEqual(err[1].find("p"), err[2].find("^"))
- if check_impl_detail(pypy=True):
- self.assertEqual(err[1].find("2)") + 1, err[2].find("^"))
+ self.assertEqual(err[1].find("p"), err[2].find("^"))
def test_base_exception(self):
# Test that exceptions derived from BaseException are formatted right
diff --git a/pypy/doc/whatsnew-head.rst b/pypy/doc/whatsnew-head.rst
--- a/pypy/doc/whatsnew-head.rst
+++ b/pypy/doc/whatsnew-head.rst
@@ -10,3 +10,9 @@
Fix for python-level classes that inherit from C-API types, previously the
`w_obj` was not necessarily preserved throughout the lifetime of the `pyobj`
which led to cases where instance attributes were lost. Fixes issue #2793
+
+
+.. branch: pyparser-improvements-2
+
+Improve line offsets that are reported by SyntaxError. Improve error messages
+for a few situations, including mismatched parenthesis.
diff --git a/pypy/interpreter/pyparser/error.py b/pypy/interpreter/pyparser/error.py
--- a/pypy/interpreter/pyparser/error.py
+++ b/pypy/interpreter/pyparser/error.py
@@ -6,6 +6,7 @@
lastlineno=0):
self.msg = msg
self.lineno = lineno
+ # NB: offset is a 1-based index!
self.offset = offset
self.text = text
self.filename = filename
diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -199,6 +199,7 @@
self.token_type = token_type
self.value = value
self.lineno = lineno
+ # this is a 0-based index
self.column = column
self.line = line
self.expected = expected
diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -232,7 +232,9 @@
if e.expected_str is not None:
msg += " (expected '%s')" % e.expected_str
- raise new_err(msg, e.lineno, e.column, e.line,
+ # parser.ParseError(...).column is 0-based, but the offsets in the
+ # exceptions in the error module are 1-based, hence the '+ 1'
+ raise new_err(msg, e.lineno, e.column + 1, e.line,
compile_info.filename)
else:
tree = self.root
diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -110,7 +110,7 @@
logical line; continuation lines are included.
"""
token_list = []
- lnum = parenlev = continued = 0
+ lnum = continued = 0
namechars = NAMECHARS
numchars = NUMCHARS
contstr, needcont = '', 0
@@ -118,7 +118,7 @@
indents = [0]
altindents = [0]
last_comment = ''
- parenlevstart = (0, 0, "")
+ parenstack = []
async_def = False
async_def_nl = False
async_def_indent = 0
@@ -138,7 +138,7 @@
if contstr:
if not line:
raise TokenError(
- "EOF while scanning triple-quoted string literal",
+ "end of file (EOF) while scanning triple-quoted string literal",
strstart[2], strstart[0], strstart[1]+1,
token_list, lnum-1)
endmatch = endDFA.recognize(line)
@@ -164,7 +164,7 @@
contline = contline + line
continue
- elif parenlev == 0 and not continued: # new statement
+ elif not parenstack and not continued: # new statement
if not line: break
column = 0
altcolumn = 0
@@ -204,13 +204,13 @@
last_comment = ''
else:
while column < indents[-1]:
- indents = indents[:-1]
- altindents = altindents[:-1]
+ indents.pop()
+ altindents.pop()
token_list.append((tokens.DEDENT, '', lnum, pos, line))
last_comment = ''
if column != indents[-1]:
err = "unindent does not match any outer indentation level"
- raise TokenIndentationError(err, line, lnum, 0, token_list)
+ raise TokenIndentationError(err, line, lnum, column+1, token_list)
if altcolumn != altindents[-1]:
raise TabError(lnum, pos, line)
if async_def_nl and async_def_indent >= indents[-1]:
@@ -220,12 +220,12 @@
else: # continued statement
if not line:
- if parenlev > 0:
- lnum1, start1, line1 = parenlevstart
+ if parenstack:
+ _, lnum1, start1, line1 = parenstack[0]
raise TokenError("parenthesis is never closed", line1,
lnum1, start1 + 1, token_list, lnum)
- raise TokenError("EOF in multi-line statement", line,
- lnum, 0, token_list)
+ raise TokenError("end of file (EOF) in multi-line statement", line,
+ lnum, 0, token_list) # XXX why is the offset 0 here?
continued = 0
while pos < max:
@@ -249,7 +249,7 @@
token_list.append((tokens.NUMBER, token, lnum, start, line))
last_comment = ''
elif initial in '\r\n':
- if parenlev <= 0:
+ if not parenstack:
if async_def:
async_def_nl = True
tok = (tokens.NEWLINE, last_comment, lnum, start, line)
@@ -331,14 +331,22 @@
continued = 1
else:
if initial in '([{':
- if parenlev == 0:
- parenlevstart = (lnum, start, line)
- parenlev = parenlev + 1
+ parenstack.append((initial, lnum, start, line))
elif initial in ')]}':
- parenlev = parenlev - 1
- if parenlev < 0:
+ if not parenstack:
raise TokenError("unmatched '%s'" % initial, line,
lnum, start + 1, token_list)
+ opening, lnum1, start1, line1 = parenstack.pop()
+ if not ((opening == "(" and initial == ")") or
+ (opening == "[" and initial == "]") or
+ (opening == "{" and initial == "}")):
+ msg = "closing parenthesis '%s' does not match opening parenthesis '%s'" % (
+ initial, opening)
+
+ if lnum1 != lnum:
+ msg += " on line " + str(lnum1)
+ raise TokenError(
+ msg, line, lnum, start + 1, token_list)
if token in python_opmap:
punct = python_opmap[token]
else:
@@ -350,7 +358,7 @@
if start < 0:
start = pos
if start<max and line[start] in single_quoted:
- raise TokenError("EOL while scanning string literal",
+ raise TokenError("end of line (EOL) while scanning string literal",
line, lnum, start+1, token_list)
tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
token_list.append(tok)
diff --git a/pypy/interpreter/pyparser/test/targetparse.py b/pypy/interpreter/pyparser/test/targetparse.py
--- a/pypy/interpreter/pyparser/test/targetparse.py
+++ b/pypy/interpreter/pyparser/test/targetparse.py
@@ -8,25 +8,36 @@
-with file("../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py") as f:
- s = f.read()
-
class FakeSpace(object):
pass
fakespace = FakeSpace()
-def bench(title):
+def bench(fn, s):
a = time.clock()
info = pyparse.CompileInfo("<string>", "exec")
parser = pyparse.PythonParser(fakespace)
tree = parser._parse(s, info)
b = time.clock()
- print title, (b-a)
+ print fn, (b-a)
def entry_point(argv):
- bench("foo")
+ if len(argv) == 2:
+ fn = argv[1]
+ else:
+ fn = "../../../../rpython/rlib/unicodedata/unicodedb_5_2_0.py"
+ fd = os.open(fn, os.O_RDONLY, 0777)
+ res = []
+ while True:
+ s = os.read(fd, 4096)
+ if not s:
+ break
+ res.append(s)
+ os.close(fd)
+ s = "".join(res)
+ print len(s)
+ bench(fn, s)
return 0
diff --git a/pypy/interpreter/pyparser/test/test_pyparse.py b/pypy/interpreter/pyparser/test/test_pyparse.py
--- a/pypy/interpreter/pyparser/test/test_pyparse.py
+++ b/pypy/interpreter/pyparser/test/test_pyparse.py
@@ -45,14 +45,14 @@
exc = py.test.raises(SyntaxError, parse, "name another for").value
assert exc.msg == "invalid syntax"
assert exc.lineno == 1
- assert exc.offset == 5
+ assert exc.offset == 6
assert exc.text.startswith("name another for")
exc = py.test.raises(SyntaxError, parse, "x = \"blah\n\n\n").value
- assert exc.msg == "EOL while scanning string literal"
+ assert exc.msg == "end of line (EOL) while scanning string literal"
assert exc.lineno == 1
assert exc.offset == 5
exc = py.test.raises(SyntaxError, parse, "x = '''\n\n\n").value
- assert exc.msg == "EOF while scanning triple-quoted string literal"
+ assert exc.msg == "end of file (EOF) while scanning triple-quoted string literal"
assert exc.lineno == 1
assert exc.offset == 5
assert exc.lastlineno == 3
@@ -81,7 +81,7 @@
assert exc.msg == "expected an indented block"
assert exc.lineno == 3
assert exc.text.startswith("pass")
- assert exc.offset == 0
+ assert exc.offset == 1
input = "hi\n indented"
exc = py.test.raises(IndentationError, parse, input).value
assert exc.msg == "unexpected indent"
@@ -89,6 +89,7 @@
exc = py.test.raises(IndentationError, parse, input).value
assert exc.msg == "unindent does not match any outer indentation level"
assert exc.lineno == 3
+ assert exc.offset == 3
def test_taberror(self):
src = """
diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py b/pypy/interpreter/pyparser/test/test_pytokenizer.py
new file mode 100644
--- /dev/null
+++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py
@@ -0,0 +1,66 @@
+import pytest
+from pypy.interpreter.pyparser import pytokenizer
+from pypy.interpreter.pyparser.pygram import tokens
+from pypy.interpreter.pyparser.error import TokenError
+
+def tokenize(s):
+ return pytokenizer.generate_tokens(s.splitlines(True) + ["\n"], 0)
+
+def check_token_error(s, msg=None, pos=-1, line=-1):
+ error = pytest.raises(TokenError, tokenize, s)
+ if msg is not None:
+ assert error.value.msg == msg
+ if pos != -1:
+ assert error.value.offset == pos
+ if line != -1:
+ assert error.value.lineno == line
+
+
+class TestTokenizer(object):
+
+ def test_simple(self):
+ line = "a+1"
+ tks = tokenize(line)
+ assert tks == [
+ (tokens.NAME, 'a', 1, 0, line),
+ (tokens.PLUS, '+', 1, 1, line),
+ (tokens.NUMBER, '1', 1, 2, line),
+ (tokens.NEWLINE, '', 2, 0, '\n'),
+ (tokens.NEWLINE, '', 2, 0, '\n'),
+ (tokens.ENDMARKER, '', 2, 0, ''),
+ ]
+
+ def test_error_parenthesis(self):
+ for paren in "([{":
+ check_token_error(paren + "1 + 2",
+ "parenthesis is never closed",
+ 1)
+
+ for paren in ")]}":
+ check_token_error("1 + 2" + paren,
+ "unmatched '%s'" % (paren, ),
+ 6)
+
+ for i, opening in enumerate("([{"):
+ for j, closing in enumerate(")]}"):
+ if i == j:
+ continue
+ check_token_error(opening + "1\n" + closing,
+ "closing parenthesis '%s' does not match opening parenthesis '%s' on line 1" % (closing, opening),
+ pos=1, line=2)
+ check_token_error(opening + "1" + closing,
+ "closing parenthesis '%s' does not match opening parenthesis '%s'" % (closing, opening),
+ pos=3, line=1)
+ check_token_error(opening + closing,
+ "closing parenthesis '%s' does not match opening parenthesis '%s'" % (closing, opening),
+ pos=2, line=1)
+
+
+ def test_unknown_char(self):
+ check_token_error("?", "Unknown character", 1)
+
+ def test_eol_string(self):
+ check_token_error("x = 'a", pos=5, line=1)
+
+ def test_eof_triple_quoted(self):
+ check_token_error("'''", pos=1, line=1)
diff --git a/pypy/interpreter/test/test_compiler.py b/pypy/interpreter/test/test_compiler.py
--- a/pypy/interpreter/test/test_compiler.py
+++ b/pypy/interpreter/test/test_compiler.py
@@ -77,7 +77,7 @@
""")
assert self.space.unwrap(w_args) == (
'unindent does not match any outer indentation level',
- ('<string>', 3, 0, ' y\n'))
+ ('<string>', 3, 2, ' y\n'))
def test_getcodeflags(self):
code = self.compiler.compile('from __future__ import division\n',
diff --git a/pypy/interpreter/test/test_syntax.py b/pypy/interpreter/test/test_syntax.py
--- a/pypy/interpreter/test/test_syntax.py
+++ b/pypy/interpreter/test/test_syntax.py
@@ -715,8 +715,7 @@
except SyntaxError as e:
assert e.lineno == 4
assert e.text.endswith('a b c d e\n')
- b_pos = e.text.index('b')
- assert e.offset in (b_pos, b_pos+1) # b_pos in pypy, b_pos+1 in CPython.
+ assert e.offset == e.text.index('b') + 1 # offset is 1-based
else:
raise Exception("no SyntaxError??")
More information about the pypy-commit
mailing list