[Python-checkins] cpython (merge 3.4 -> 3.5): Issue #20387: Merge test and patch from 3.4.4
jason.coombs
python-checkins at python.org
Sun Jun 28 17:17:51 CEST 2015
https://hg.python.org/cpython/rev/98380a6e037c
changeset: 96713:98380a6e037c
branch: 3.5
parent: 96704:3039cb5b673c
parent: 96712:9ce5c1f371f7
user: Jason R. Coombs <jaraco at jaraco.com>
date: Sun Jun 28 11:13:30 2015 -0400
summary:
Issue #20387: Merge test and patch from 3.4.4
files:
Lib/test/test_tokenize.py | 21 ++++++++++++++++++++-
Lib/tokenize.py | 17 +++++++++++++++++
Misc/NEWS | 3 +++
3 files changed, 40 insertions(+), 1 deletions(-)
diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py
--- a/Lib/test/test_tokenize.py
+++ b/Lib/test/test_tokenize.py
@@ -5,6 +5,8 @@
code, print out a table with tokens. The ENDMARKER is omitted for
brevity.
+ >>> import glob
+
>>> dump_tokens("1 + 1")
ENCODING 'utf-8' (0, 0) (0, 0)
NUMBER '1' (1, 0) (1, 1)
@@ -835,7 +837,7 @@
open as tokenize_open, Untokenizer)
from io import BytesIO
from unittest import TestCase, mock
-import os, sys, glob
+import os
import token
def dump_tokens(s):
@@ -1427,6 +1429,22 @@
self.assertEqual(untokenize(iter(tokens)), b'Hello ')
+class TestRoundtrip(TestCase):
+ def roundtrip(self, code):
+ if isinstance(code, str):
+ code = code.encode('utf-8')
+ return untokenize(tokenize(BytesIO(code).readline)).decode('utf-8')
+
+ def test_indentation_semantics_retained(self):
+ """
+ Ensure that although whitespace might be mutated in a roundtrip,
+ the semantic meaning of the indentation remains consistent.
+ """
+ code = "if False:\n\tx=3\n\tx=3\n"
+ codelines = self.roundtrip(code).split('\n')
+ self.assertEqual(codelines[1], codelines[2])
+
+
__test__ = {"doctests" : doctests, 'decistmt': decistmt}
def test_main():
@@ -1437,6 +1455,7 @@
support.run_unittest(TestDetectEncoding)
support.run_unittest(TestTokenize)
support.run_unittest(UntokenizeTest)
+ support.run_unittest(TestRoundtrip)
if __name__ == "__main__":
test_main()
diff --git a/Lib/tokenize.py b/Lib/tokenize.py
--- a/Lib/tokenize.py
+++ b/Lib/tokenize.py
@@ -244,6 +244,8 @@
def untokenize(self, iterable):
it = iter(iterable)
+ indents = []
+ startline = False
for t in it:
if len(t) == 2:
self.compat(t, it)
@@ -254,6 +256,21 @@
continue
if tok_type == ENDMARKER:
break
+ if tok_type == INDENT:
+ indents.append(token)
+ continue
+ elif tok_type == DEDENT:
+ indents.pop()
+ self.prev_row, self.prev_col = end
+ continue
+ elif tok_type in (NEWLINE, NL):
+ startline = True
+ elif startline and indents:
+ indent = indents[-1]
+ if start[1] >= len(indent):
+ self.tokens.append(indent)
+ self.prev_col = len(indent)
+ startline = False
self.add_whitespace(start)
self.tokens.append(token)
self.prev_row, self.prev_col = end
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -25,6 +25,9 @@
Library
-------
+- Issue #20387: Restore semantic round-trip correctness in tokenize/untokenize
+ for tab-indented blocks.
+
- Issue #24456: Fixed possible buffer over-read in adpcm2lin() and lin2adpcm()
functions of the audioop module.
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list