[Python-checkins] r80768 - in sandbox/trunk/untabify: untabify.py

antoine.pitrou python-checkins at python.org
Tue May 4 22:17:35 CEST 2010


Author: antoine.pitrou
Date: Tue May  4 22:17:35 2010
New Revision: 80768

Log:
A script un-tab-ifying C files



Added:
   sandbox/trunk/untabify/
   sandbox/trunk/untabify/untabify.py   (contents, props changed)

Added: sandbox/trunk/untabify/untabify.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/untabify/untabify.py	Tue May  4 22:17:35 2010
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+
+"""
+Untabify C files, converting them from tab indent to 4-spaces indent.
+Tries to take care of continuation lines, as well as vertical alignment.
+Gives sufficiently good results on most files of the source tree; visual
+inspection is still recommended to fix possible cosmetic breakage.
+
+Notes:
+
+- only whitespace is added/removed, therefore compilation can't be
+  broken.
+- only run on files really using tab indent; this shouldn't be run on files
+  already using 4-spaces indent (use -l option to list files using tab indent)
+"""
+
+import os
+import re
+import sys
+import tempfile
+import optparse
+import itertools
+
+
+def untabify(lines, write):
+    last_indent = 0
+    last_outdent = 0
+    last_outline = ""
+    for line in lines:
+        cr = line.find('\r')
+        lf = line.find('\r')
+        if lf == -1:
+            eolpos = cr
+        elif cr == -1:
+            eolpos = lf
+        else:
+            assert cr != -1 and lf != -1
+            eolpos = min(cr, lf)
+        eol = line[eolpos:]
+        line = line.rstrip()
+        chunks = line.split("\t")
+        output = []
+        outpos = 0
+        inpos = 0
+        # Count leading spaces
+        for n, c in enumerate(chunks):
+            if not c:
+                inpos = (inpos // 8 + 1) * 8
+                outpos += 4
+                continue
+            if not c.startswith(' '):
+                break
+            chunks[n] = c.lstrip(' ')
+            k = len(c) - len(chunks[n])
+            inpos += k
+            outpos += k
+            if chunks[n]:
+                break
+            inpos = (inpos // 8 + 1) * 8
+            outpos += 4
+        # Continuation line?
+        if (inpos > last_indent + 8
+            # labels and end-of-comments can't be continued
+            and not last_outline.endswith(':')
+            and not last_outline.endswith('*/')):
+            outpos = inpos + (last_outdent - last_indent)
+            #print "--- Continuation line inpos=%d outpos=%d" % (inpos, outpos)
+            indent = last_indent
+            outdent = last_outdent
+        else:
+            outpos = (inpos // 8) * 4 + (inpos % 8)
+            indent = inpos
+            outdent = outpos
+        output.append(' ' * outpos)
+        # Process rest of the line, fixing position of internal tabs
+        for c in chunks[n:-1]:
+            output.append(c)
+            outpos += len(c)
+            inpos += len(c)
+            inpos = (inpos // 8 + 1) * 8
+            output.append((inpos - outpos) * " ")
+            outpos = inpos
+        output.append(chunks[-1])
+        outline = "".join(output).rstrip()
+        if outline and not outline.startswith('#'):
+            # Non-empty, non-preprocessor line:
+            # remember indentation for detection of continuation lines
+            last_indent = indent
+            last_outdent = outdent
+            last_outline = outline
+        write(outline)
+        write(eol)
+
+
+def needs_untabifying(filepath):
+    f = open(filepath, "rb")
+    try:
+        s = f.read(65536)
+        # Heuristic: we don't want a couple of lone tabs to show as false
+        # positives
+        return s.count("\n\t") + s.count("\r\t") > 5
+    finally:
+        f.close()
+
+def walk_c_files(paths):
+    for p in paths:
+        if os.path.isfile(p):
+            yield p
+        for dirpath, dirnames, filenames in os.walk(p):
+            for fn in sorted(filenames):
+                if fn.endswith('.h') or fn.endswith('.c'):
+                    yield os.path.join(dirpath, fn)
+
+def main():
+    parser = optparse.OptionParser()
+    parser.add_option("-l", "--list", dest="do_list_true",
+                      action="store_true", default=False,
+                      help="list files needing untabifying")
+    parser.add_option("-n", "--list-neg", dest="do_list_false",
+                      action="store_true", default=False,
+                      help="list files *not* needing untabifying")
+    parser.add_option("-u", "--untabify", dest="do_untab",
+                      action="store_true", default=False,
+                      help="untabify stdin to stdout")
+    parser.add_option("-b", "--batch", dest="do_batch",
+                      action="store_true", default=False,
+                      help="untabify specified files and dirs")
+    options, args = parser.parse_args()
+    if (options.do_list_true + options.do_list_false +
+        options.do_untab + options.do_batch != 1):
+        parser.error("you must specify exactly one of -l, -n, -b and -u")
+
+    if options.do_list_true or options.do_list_false:
+        if not args:
+            parser.error("-l and -n need a directory path")
+        _filter = (itertools.ifilter if options.do_list_true else 
+                   itertools.ifilterfalse)
+        for cfn in _filter(needs_untabifying,
+                           walk_c_files(args)):
+            print cfn
+        return
+
+    if options.do_batch:
+        if not args:
+            parser.error("-b needs a file or directory path")
+        for cfn in itertools.ifilter(needs_untabifying,
+                                     walk_c_files(args)):
+            fd, tmpfn = tempfile.mkstemp(dir='.')
+            try:
+                fin = open(cfn, "rb")
+                fout = os.fdopen(fd, "wb")
+                print "Untabifing %s" % cfn
+                try:
+                    untabify(fin, fout.write)
+                finally:
+                    fin.close()
+                    fout.close()
+                os.rename(tmpfn, cfn)
+            except:
+                os.unlink(tmpfn)
+
+    if options.do_untab:
+        untabify(sys.stdin, sys.stdout.write)
+
+if __name__ == '__main__':
+    main()
+


More information about the Python-checkins mailing list