[Python-checkins] r76250 - in sandbox/trunk/2to3/lib2to3: pgen2/tokenize.py tests/data/bom.py tests/test_parser.py tests/test_refactor.py
benjamin.peterson
python-checkins at python.org
Fri Nov 13 23:56:49 CET 2009
Author: benjamin.peterson
Date: Fri Nov 13 23:56:48 2009
New Revision: 76250
Log:
fix handling of a utf-8 bom #7313
Added:
sandbox/trunk/2to3/lib2to3/tests/data/bom.py (contents, props changed)
Modified:
sandbox/trunk/2to3/lib2to3/pgen2/tokenize.py
sandbox/trunk/2to3/lib2to3/tests/test_parser.py
sandbox/trunk/2to3/lib2to3/tests/test_refactor.py
Modified: sandbox/trunk/2to3/lib2to3/pgen2/tokenize.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/pgen2/tokenize.py (original)
+++ sandbox/trunk/2to3/lib2to3/pgen2/tokenize.py Fri Nov 13 23:56:48 2009
@@ -281,9 +281,13 @@
# This behaviour mimics the Python interpreter
raise SyntaxError("unknown encoding: " + encoding)
- if bom_found and codec.name != 'utf-8':
- # This behaviour mimics the Python interpreter
- raise SyntaxError('encoding problem: utf-8')
+ if bom_found:
+ if codec.name != 'utf-8':
+ # This behaviour mimics the Python interpreter
+ raise SyntaxError('encoding problem: utf-8')
+ else:
+ # Allow it to be properly encoded and decoded.
+ encoding = 'utf-8-sig'
return encoding
first = read_or_stop()
Added: sandbox/trunk/2to3/lib2to3/tests/data/bom.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/2to3/lib2to3/tests/data/bom.py Fri Nov 13 23:56:48 2009
@@ -0,0 +1,3 @@
+# coding: utf-8
+print "BOM BOOM!"
+
Modified: sandbox/trunk/2to3/lib2to3/tests/test_parser.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/tests/test_parser.py (original)
+++ sandbox/trunk/2to3/lib2to3/tests/test_parser.py Fri Nov 13 23:56:48 2009
@@ -161,6 +161,7 @@
tree = driver.parse_string(source)
new = unicode(tree)
if diff(filepath, new, encoding):
+ import pdb; pdb.set_trace()
self.fail("Idempotency failed: %s" % filepath)
def test_extended_unpacking(self):
Modified: sandbox/trunk/2to3/lib2to3/tests/test_refactor.py
==============================================================================
--- sandbox/trunk/2to3/lib2to3/tests/test_refactor.py (original)
+++ sandbox/trunk/2to3/lib2to3/tests/test_refactor.py Fri Nov 13 23:56:48 2009
@@ -4,6 +4,7 @@
import sys
import os
+import codecs
import operator
import StringIO
import tempfile
@@ -177,10 +178,12 @@
try:
rt.refactor_file(test_file, True)
- self.assertNotEqual(old_contents, read_file())
+ new_contents = read_file()
+ self.assertNotEqual(old_contents, new_contents)
finally:
with open(test_file, "wb") as fp:
fp.write(old_contents)
+ return new_contents
def test_refactor_file(self):
test_file = os.path.join(FIXER_DIR, "parrot_example.py")
@@ -221,6 +224,11 @@
fn = os.path.join(TEST_DATA_DIR, "different_encoding.py")
self.check_file_refactoring(fn)
+ def test_bom(self):
+ fn = os.path.join(TEST_DATA_DIR, "bom.py")
+ data = self.check_file_refactoring(fn)
+ self.assertTrue(data.startswith(codecs.BOM_UTF8))
+
def test_crlf_newlines(self):
old_sep = os.linesep
os.linesep = "\r\n"
More information about the Python-checkins
mailing list