[pypy-commit] pypy pyparser-improvements-3: use Token class in pytokenizer too
cfbolz
pypy.commits at gmail.com
Sat Apr 14 05:21:22 EDT 2018
Author: Carl Friedrich Bolz-Tereick <cfbolz at gmx.de>
Branch: pyparser-improvements-3
Changeset: r94317:d965ef8c054e
Date: 2018-04-14 11:06 +0200
http://bitbucket.org/pypy/pypy/changeset/d965ef8c054e/
Log: use Token class in pytokenizer too
diff --git a/pypy/interpreter/pyparser/future.py b/pypy/interpreter/pyparser/future.py
--- a/pypy/interpreter/pyparser/future.py
+++ b/pypy/interpreter/pyparser/future.py
@@ -43,7 +43,7 @@
self.tok = self.tokens[index]
def skip(self, n):
- if self.tok[0] == n:
+ if self.tok.token_type == n:
self.next()
return True
else:
@@ -51,7 +51,7 @@
def skip_name(self, name):
from pypy.interpreter.pyparser import pygram
- if self.tok[0] == pygram.tokens.NAME and self.tok[1] == name:
+ if self.tok.token_type == pygram.tokens.NAME and self.tok.value == name:
self.next()
return True
else:
@@ -59,8 +59,8 @@
def next_feature_name(self):
from pypy.interpreter.pyparser import pygram
- if self.tok[0] == pygram.tokens.NAME:
- name = self.tok[1]
+ if self.tok.token_type == pygram.tokens.NAME:
+ name = self.tok.value
self.next()
if self.skip_name("as"):
self.skip(pygram.tokens.NAME)
@@ -101,7 +101,7 @@
# somewhere inside the last __future__ import statement
# (at the start would be fine too, but it's easier to grab a
# random position inside)
- last_position = (it.tok[2], it.tok[3])
+ last_position = (it.tok.lineno, it.tok.column)
result |= future_flags.get_compiler_feature(it.next_feature_name())
while it.skip(pygram.tokens.COMMA):
result |= future_flags.get_compiler_feature(it.next_feature_name())
diff --git a/pypy/interpreter/pyparser/parser.py b/pypy/interpreter/pyparser/parser.py
--- a/pypy/interpreter/pyparser/parser.py
+++ b/pypy/interpreter/pyparser/parser.py
@@ -78,6 +78,19 @@
def __repr__(self):
return "Token(%s, %s)" % (self.token_type, self.value)
+ def __eq__(self, other):
+ # for tests
+ return (
+ self.token_type == other.token_type and
+ self.value == other.value and
+ self.lineno == other.lineno and
+ self.column == other.column and
+ self.line == other.line
+ )
+
+ def __ne__(self, other):
+ return not self == other
+
class Node(object):
diff --git a/pypy/interpreter/pyparser/pyparse.py b/pypy/interpreter/pyparser/pyparse.py
--- a/pypy/interpreter/pyparser/pyparse.py
+++ b/pypy/interpreter/pyparser/pyparse.py
@@ -147,7 +147,6 @@
flags &= ~consts.PyCF_DONT_IMPLY_DEDENT
self.prepare(_targets[compile_info.mode])
- tp = 0
try:
try:
# Note: we no longer pass the CO_FUTURE_* to the tokenizer,
@@ -165,8 +164,8 @@
else:
self.grammar = pygram.python_grammar
- for tp, value, lineno, column, line in tokens:
- if self.add_token(parser.Token(tp, value, lineno, column, line)):
+ for token in tokens:
+ if self.add_token(token):
break
except error.TokenError as e:
e.filename = compile_info.filename
@@ -178,7 +177,7 @@
# Catch parse errors, pretty them up and reraise them as a
# SyntaxError.
new_err = error.IndentationError
- if tp == pygram.tokens.INDENT:
+ if token.token_type == pygram.tokens.INDENT:
msg = "unexpected indent"
elif e.expected == pygram.tokens.INDENT:
msg = "expected an indented block"
diff --git a/pypy/interpreter/pyparser/pytokenizer.py b/pypy/interpreter/pyparser/pytokenizer.py
--- a/pypy/interpreter/pyparser/pytokenizer.py
+++ b/pypy/interpreter/pyparser/pytokenizer.py
@@ -1,4 +1,5 @@
from pypy.interpreter.pyparser import automata
+from pypy.interpreter.pyparser.parser import Token
from pypy.interpreter.pyparser.pygram import tokens
from pypy.interpreter.pyparser.pytoken import python_opmap
from pypy.interpreter.pyparser.error import TokenError, TokenIndentationError
@@ -103,7 +104,7 @@
endmatch = endDFA.recognize(line)
if endmatch >= 0:
pos = end = endmatch
- tok = (tokens.STRING, contstr + line[:end], strstart[0],
+ tok = Token(tokens.STRING, contstr + line[:end], strstart[0],
strstart[1], line)
token_list.append(tok)
last_comment = ''
@@ -111,7 +112,7 @@
contline = None
elif (needcont and not line.endswith('\\\n') and
not line.endswith('\\\r\n')):
- tok = (tokens.ERRORTOKEN, contstr + line, strstart[0],
+ tok = Token(tokens.ERRORTOKEN, contstr + line, strstart[0],
strstart[1], line)
token_list.append(tok)
last_comment = ''
@@ -140,11 +141,11 @@
if column > indents[-1]: # count indents or dedents
indents.append(column)
- token_list.append((tokens.INDENT, line[:pos], lnum, 0, line))
+ token_list.append(Token(tokens.INDENT, line[:pos], lnum, 0, line))
last_comment = ''
while column < indents[-1]:
indents.pop()
- token_list.append((tokens.DEDENT, '', lnum, pos, line))
+ token_list.append(Token(tokens.DEDENT, '', lnum, pos, line))
last_comment = ''
if column != indents[-1]:
err = "unindent does not match any outer indentation level"
@@ -177,11 +178,11 @@
token, initial = line[start:end], line[start]
if initial in numchars or \
(initial == '.' and token != '.'): # ordinary number
- token_list.append((tokens.NUMBER, token, lnum, start, line))
+ token_list.append(Token(tokens.NUMBER, token, lnum, start, line))
last_comment = ''
elif initial in '\r\n':
if not parenstack:
- tok = (tokens.NEWLINE, last_comment, lnum, start, line)
+ tok = Token(tokens.NEWLINE, last_comment, lnum, start, line)
token_list.append(tok)
last_comment = ''
elif initial == '#':
@@ -193,7 +194,7 @@
if endmatch >= 0: # all on one line
pos = endmatch
token = line[start:pos]
- tok = (tokens.STRING, token, lnum, start, line)
+ tok = Token(tokens.STRING, token, lnum, start, line)
token_list.append(tok)
last_comment = ''
else:
@@ -212,11 +213,11 @@
contline = line
break
else: # ordinary string
- tok = (tokens.STRING, token, lnum, start, line)
+ tok = Token(tokens.STRING, token, lnum, start, line)
token_list.append(tok)
last_comment = ''
elif initial in namechars: # ordinary name
- token_list.append((tokens.NAME, token, lnum, start, line))
+ token_list.append(Token(tokens.NAME, token, lnum, start, line))
last_comment = ''
elif initial == '\\': # continued stmt
continued = 1
@@ -242,7 +243,7 @@
punct = python_opmap[token]
else:
punct = tokens.OP
- token_list.append((punct, token, lnum, start, line))
+ token_list.append(Token(punct, token, lnum, start, line))
last_comment = ''
else:
start = whiteSpaceDFA.recognize(line, pos)
@@ -251,22 +252,22 @@
if start<max and line[start] in single_quoted:
raise TokenError("end of line (EOL) while scanning string literal",
line, lnum, start+1, token_list)
- tok = (tokens.ERRORTOKEN, line[pos], lnum, pos, line)
+ tok = Token(tokens.ERRORTOKEN, line[pos], lnum, pos, line)
token_list.append(tok)
last_comment = ''
pos = pos + 1
lnum -= 1
if not (flags & consts.PyCF_DONT_IMPLY_DEDENT):
- if token_list and token_list[-1][0] != tokens.NEWLINE:
- tok = (tokens.NEWLINE, '', lnum, 0, '\n')
+ if token_list and token_list[-1].token_type != tokens.NEWLINE:
+ tok = Token(tokens.NEWLINE, '', lnum, 0, '\n')
token_list.append(tok)
for indent in indents[1:]: # pop remaining indent levels
- token_list.append((tokens.DEDENT, '', lnum, pos, line))
- tok = (tokens.NEWLINE, '', lnum, 0, '\n')
+ token_list.append(Token(tokens.DEDENT, '', lnum, pos, line))
+ tok = Token(tokens.NEWLINE, '', lnum, 0, '\n')
token_list.append(tok)
- token_list.append((tokens.ENDMARKER, '', lnum, pos, line))
+ token_list.append(Token(tokens.ENDMARKER, '', lnum, pos, line))
return token_list
diff --git a/pypy/interpreter/pyparser/test/test_pytokenizer.py b/pypy/interpreter/pyparser/test/test_pytokenizer.py
--- a/pypy/interpreter/pyparser/test/test_pytokenizer.py
+++ b/pypy/interpreter/pyparser/test/test_pytokenizer.py
@@ -1,5 +1,6 @@
import pytest
from pypy.interpreter.pyparser import pytokenizer
+from pypy.interpreter.pyparser.parser import Token
from pypy.interpreter.pyparser.pygram import tokens
from pypy.interpreter.pyparser.error import TokenError
@@ -22,12 +23,12 @@
line = "a+1"
tks = tokenize(line)
assert tks == [
- (tokens.NAME, 'a', 1, 0, line),
- (tokens.PLUS, '+', 1, 1, line),
- (tokens.NUMBER, '1', 1, 2, line),
- (tokens.NEWLINE, '', 2, 0, '\n'),
- (tokens.NEWLINE, '', 2, 0, '\n'),
- (tokens.ENDMARKER, '', 2, 0, ''),
+ Token(tokens.NAME, 'a', 1, 0, line),
+ Token(tokens.PLUS, '+', 1, 1, line),
+ Token(tokens.NUMBER, '1', 1, 2, line),
+ Token(tokens.NEWLINE, '', 2, 0, '\n'),
+ Token(tokens.NEWLINE, '', 2, 0, '\n'),
+ Token(tokens.ENDMARKER, '', 2, 0, ''),
]
def test_error_parenthesis(self):
More information about the pypy-commit
mailing list