[Python-checkins] bpo-30109: Fix reindent.py for non-ASCII files. (#5637)

Serhiy Storchaka webhook-mailer at python.org
Mon Feb 12 13:16:46 EST 2018


https://github.com/python/cpython/commit/17cec70a38b297779b8fd3f081fb041d45ae1dff
commit: 17cec70a38b297779b8fd3f081fb041d45ae1dff
branch: 2.7
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-02-12T20:16:42+02:00
summary:

bpo-30109: Fix reindent.py for non-ASCII files. (#5637)

It now processes files as binary streams.

This also fixes "make reindent".

files:
A Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst
M Lib/email/utils.py
M Tools/scripts/reindent.py

diff --git a/Lib/email/utils.py b/Lib/email/utils.py
index a74db42286e7..5b22521e5814 100644
--- a/Lib/email/utils.py
+++ b/Lib/email/utils.py
@@ -213,7 +213,7 @@ def parsedate_tz(data):
 def parseaddr(addr):
     """
     Parse addr into its constituent realname and email address parts.
-    
+
     Return a tuple of realname and email address, unless the parse fails, in
     which case return a 2-tuple of ('', '').
     """
diff --git a/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst b/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst
new file mode 100644
index 000000000000..89249e171879
--- /dev/null
+++ b/Misc/NEWS.d/next/Tools-Demos/2018-02-12-14-27-01.bpo-30109.lIYlaf.rst
@@ -0,0 +1,2 @@
+Fixed Tools/scripts/reindent.py for non-ASCII files. It now processes files
+as binary streams. This also fixes "make reindent".
diff --git a/Tools/scripts/reindent.py b/Tools/scripts/reindent.py
index df15edbc8d65..540901ba7d70 100755
--- a/Tools/scripts/reindent.py
+++ b/Tools/scripts/reindent.py
@@ -109,7 +109,7 @@ def check(file):
     if verbose:
         print "checking", file, "...",
     try:
-        f = io.open(file)
+        f = open(file, "rb")
     except IOError, msg:
         errprint("%s: I/O Error: %s" % (file, str(msg)))
         return
@@ -133,7 +133,7 @@ def check(file):
                 shutil.copyfile(file, bak)
                 if verbose:
                     print "backed up", file, "to", bak
-            f = io.open(file, "w", newline=newline)
+            f = open(file, "wb")
             r.write(f)
             f.close()
             if verbose:
@@ -144,7 +144,21 @@ def check(file):
             print "unchanged."
         return False
 
-def _rstrip(line, JUNK='\n \t'):
+def _detect_newlines(lines):
+    newlines = {'\r\n' if line[-2:] == '\r\n' else
+                '\n' if line[-1:] == '\n' else
+                '\r' if line[-1:] == '\r' else
+                ''
+                for line in lines}
+    newlines.discard('')
+    newlines = tuple(sorted(newlines))
+    if not newlines:
+        return '\n'
+    if len(newlines) == 1:
+        return newlines[0]
+    return newlines
+
+def _rstrip(line, JUNK='\r\n \t'):
     """Return line stripped of trailing spaces, tabs, newlines.
 
     Note that line.rstrip() instead also strips sundry control characters,
@@ -166,10 +180,18 @@ def __init__(self, f):
         # Raw file lines.
         self.raw = f.readlines()
 
+        # Save the newlines found in the file so they can be used to
+        #  create output without mutating the newlines.
+        self.newlines = _detect_newlines(self.raw)
+        if isinstance(self.newlines, tuple):
+            self.newline = self.newlines[0]
+        else:
+            self.newline = self.newlines
+
         # File lines, rstripped & tab-expanded.  Dummy at start is so
         # that we can use tokenize's 1-based line numbering easily.
-        # Note that a line is all-blank iff it's "\n".
-        self.lines = [_rstrip(line).expandtabs() + "\n"
+        # Note that a line is all-blank iff it's newline.
+        self.lines = [_rstrip(line).expandtabs() + self.newline
                       for line in self.raw]
         self.lines.insert(0, None)
         self.index = 1  # index into self.lines of next line
@@ -180,15 +202,11 @@ def __init__(self, f):
         # indeed, they're our headache!
         self.stats = []
 
-        # Save the newlines found in the file so they can be used to
-        #  create output without mutating the newlines.
-        self.newlines = f.newlines
-
     def run(self):
         tokenize.tokenize(self.getline, self.tokeneater)
         # Remove trailing empty lines.
         lines = self.lines
-        while lines and lines[-1] == "\n":
+        while lines and lines[-1] == self.newline:
             lines.pop()
         # Sentinel.
         stats = self.stats
@@ -244,7 +262,7 @@ def run(self):
             else:
                 for line in lines[thisstmt:nextstmt]:
                     if diff > 0:
-                        if line == "\n":
+                        if line == self.newline:
                             after.append(line)
                         else:
                             after.append(" " * diff + line)



More information about the Python-checkins mailing list