[Python-checkins] r68730 - in python/branches/py3k/Lib/idlelib: IOBinding.py NEWS.txt ScriptBinding.py

martin.v.loewis python-checkins at python.org
Sun Jan 18 21:15:42 CET 2009


Author: martin.v.loewis
Date: Sun Jan 18 21:15:42 2009
New Revision: 68730

Log:
Issue #4008: Fix problems with non-ASCII source files.


Modified:
   python/branches/py3k/Lib/idlelib/IOBinding.py
   python/branches/py3k/Lib/idlelib/NEWS.txt
   python/branches/py3k/Lib/idlelib/ScriptBinding.py

Modified: python/branches/py3k/Lib/idlelib/IOBinding.py
==============================================================================
--- python/branches/py3k/Lib/idlelib/IOBinding.py	(original)
+++ python/branches/py3k/Lib/idlelib/IOBinding.py	Sun Jan 18 21:15:42 2009
@@ -74,10 +74,11 @@
     Raise a LookupError if the encoding is declared but unknown.
     """
     if isinstance(data, bytes):
-        try:
-            lines = data.decode('utf-8')
-        except UnicodeDecodeError:
-            return None
+        # This encoding might be wrong. However, the coding
+        # spec must be ASCII-only, so any non-ASCII characters
+        # around here will be ignored. Decoding to Latin-1 should
+        # never fail (except for memory outage)
+        lines = data.decode('iso-8859-1')
     else:
         lines = data
     # consider only the first two lines

Modified: python/branches/py3k/Lib/idlelib/NEWS.txt
==============================================================================
--- python/branches/py3k/Lib/idlelib/NEWS.txt	(original)
+++ python/branches/py3k/Lib/idlelib/NEWS.txt	Sun Jan 18 21:15:42 2009
@@ -3,6 +3,8 @@
 
 *Release date: XX-XXX-XXXX*
 
+- Issue #4008: Fix problems with non-ASCII source files.
+
 - Issue #4323: Always encode source as UTF-8 without asking
   the user (unless a different encoding is declared); remove
   user configuration of source encoding; all according to

Modified: python/branches/py3k/Lib/idlelib/ScriptBinding.py
==============================================================================
--- python/branches/py3k/Lib/idlelib/ScriptBinding.py	(original)
+++ python/branches/py3k/Lib/idlelib/ScriptBinding.py	Sun Jan 18 21:15:42 2009
@@ -24,7 +24,7 @@
 import tokenize
 import tkinter.messagebox as tkMessageBox
 from idlelib.EditorWindow import EditorWindow
-from idlelib import PyShell
+from idlelib import PyShell, IOBinding
 
 from idlelib.configHandler import idleConf
 
@@ -62,7 +62,13 @@
             return 'break'
 
     def tabnanny(self, filename):
-        f = open(filename, 'r')
+        # XXX: tabnanny should work on binary files as well
+        with open(filename, 'r', encoding='iso-8859-1') as f:
+            two_lines = f.readline() + f.readline()
+        encoding = IOBinding.coding_spec(two_lines)
+        if not encoding:
+            encoding = 'utf-8'
+        f = open(filename, 'r', encoding=encoding)
         try:
             tabnanny.process_tokens(tokenize.generate_tokens(f.readline))
         except tokenize.TokenError as msg:
@@ -82,14 +88,14 @@
         self.shell = shell = self.flist.open_shell()
         saved_stream = shell.get_warning_stream()
         shell.set_warning_stream(shell.stderr)
-        f = open(filename, 'r')
+        f = open(filename, 'rb')
         source = f.read()
         f.close()
-        if '\r' in source:
-            source = re.sub(r"\r\n", "\n", source)
-            source = re.sub(r"\r", "\n", source)
-        if source and source[-1] != '\n':
-            source = source + '\n'
+        if b'\r' in source:
+            source = source.replace(b'\r\n', b'\n')
+            source = source.replace(b'\r', b'\n')
+        if source and source[-1] != ord(b'\n'):
+            source = source + b'\n'
         editwin = self.editwin
         text = editwin.text
         text.tag_remove("ERROR", "1.0", "end")


More information about the Python-checkins mailing list