[Python-checkins] r80768 - in sandbox/trunk/untabify: untabify.py
antoine.pitrou
python-checkins at python.org
Tue May 4 22:17:35 CEST 2010
Author: antoine.pitrou
Date: Tue May 4 22:17:35 2010
New Revision: 80768
Log:
A script un-tab-ifying C files
Added:
sandbox/trunk/untabify/
sandbox/trunk/untabify/untabify.py (contents, props changed)
Added: sandbox/trunk/untabify/untabify.py
==============================================================================
--- (empty file)
+++ sandbox/trunk/untabify/untabify.py Tue May 4 22:17:35 2010
@@ -0,0 +1,167 @@
+#!/usr/bin/env python
+
+"""
+Untabify C files, converting them from tab indent to 4-spaces indent.
+Tries to take care of continuation lines, as well as vertical alignment.
+Gives sufficiently good results on most files of the source tree; visual
+inspection is still recommended to fix possible cosmetic breakage.
+
+Notes:
+
+- only whitespace is added/removed, therefore compilation can't be
+ broken.
+- only run on files really using tab indent; this shouldn't be run on files
+ already using 4-spaces indent (use -l option to list files using tab indent)
+"""
+
+import os
+import re
+import sys
+import tempfile
+import optparse
+import itertools
+
+
+def untabify(lines, write):
+ last_indent = 0
+ last_outdent = 0
+ last_outline = ""
+ for line in lines:
+ cr = line.find('\r')
+ lf = line.find('\r')
+ if lf == -1:
+ eolpos = cr
+ elif cr == -1:
+ eolpos = lf
+ else:
+ assert cr != -1 and lf != -1
+ eolpos = min(cr, lf)
+ eol = line[eolpos:]
+ line = line.rstrip()
+ chunks = line.split("\t")
+ output = []
+ outpos = 0
+ inpos = 0
+ # Count leading spaces
+ for n, c in enumerate(chunks):
+ if not c:
+ inpos = (inpos // 8 + 1) * 8
+ outpos += 4
+ continue
+ if not c.startswith(' '):
+ break
+ chunks[n] = c.lstrip(' ')
+ k = len(c) - len(chunks[n])
+ inpos += k
+ outpos += k
+ if chunks[n]:
+ break
+ inpos = (inpos // 8 + 1) * 8
+ outpos += 4
+ # Continuation line?
+ if (inpos > last_indent + 8
+ # labels and end-of-comments can't be continued
+ and not last_outline.endswith(':')
+ and not last_outline.endswith('*/')):
+ outpos = inpos + (last_outdent - last_indent)
+ #print "--- Continuation line inpos=%d outpos=%d" % (inpos, outpos)
+ indent = last_indent
+ outdent = last_outdent
+ else:
+ outpos = (inpos // 8) * 4 + (inpos % 8)
+ indent = inpos
+ outdent = outpos
+ output.append(' ' * outpos)
+ # Process rest of the line, fixing position of internal tabs
+ for c in chunks[n:-1]:
+ output.append(c)
+ outpos += len(c)
+ inpos += len(c)
+ inpos = (inpos // 8 + 1) * 8
+ output.append((inpos - outpos) * " ")
+ outpos = inpos
+ output.append(chunks[-1])
+ outline = "".join(output).rstrip()
+ if outline and not outline.startswith('#'):
+ # Non-empty, non-preprocessor line:
+ # remember indentation for detection of continuation lines
+ last_indent = indent
+ last_outdent = outdent
+ last_outline = outline
+ write(outline)
+ write(eol)
+
+
+def needs_untabifying(filepath):
+ f = open(filepath, "rb")
+ try:
+ s = f.read(65536)
+ # Heuristic: we don't want a couple of lone tabs to show as false
+ # positives
+ return s.count("\n\t") + s.count("\r\t") > 5
+ finally:
+ f.close()
+
+def walk_c_files(paths):
+ for p in paths:
+ if os.path.isfile(p):
+ yield p
+ for dirpath, dirnames, filenames in os.walk(p):
+ for fn in sorted(filenames):
+ if fn.endswith('.h') or fn.endswith('.c'):
+ yield os.path.join(dirpath, fn)
+
+def main():
+ parser = optparse.OptionParser()
+ parser.add_option("-l", "--list", dest="do_list_true",
+ action="store_true", default=False,
+ help="list files needing untabifying")
+ parser.add_option("-n", "--list-neg", dest="do_list_false",
+ action="store_true", default=False,
+ help="list files *not* needing untabifying")
+ parser.add_option("-u", "--untabify", dest="do_untab",
+ action="store_true", default=False,
+ help="untabify stdin to stdout")
+ parser.add_option("-b", "--batch", dest="do_batch",
+ action="store_true", default=False,
+ help="untabify specified files and dirs")
+ options, args = parser.parse_args()
+ if (options.do_list_true + options.do_list_false +
+ options.do_untab + options.do_batch != 1):
+ parser.error("you must specify exactly one of -l, -n, -b and -u")
+
+ if options.do_list_true or options.do_list_false:
+ if not args:
+ parser.error("-l and -n need a directory path")
+ _filter = (itertools.ifilter if options.do_list_true else
+ itertools.ifilterfalse)
+ for cfn in _filter(needs_untabifying,
+ walk_c_files(args)):
+ print cfn
+ return
+
+ if options.do_batch:
+ if not args:
+ parser.error("-b needs a file or directory path")
+ for cfn in itertools.ifilter(needs_untabifying,
+ walk_c_files(args)):
+ fd, tmpfn = tempfile.mkstemp(dir='.')
+ try:
+ fin = open(cfn, "rb")
+ fout = os.fdopen(fd, "wb")
+ print "Untabifing %s" % cfn
+ try:
+ untabify(fin, fout.write)
+ finally:
+ fin.close()
+ fout.close()
+ os.rename(tmpfn, cfn)
+ except:
+ os.unlink(tmpfn)
+
+ if options.do_untab:
+ untabify(sys.stdin, sys.stdout.write)
+
+if __name__ == '__main__':
+ main()
+
More information about the Python-checkins
mailing list