Problem with tokenize module and indents
Simon Forman
rogue_pedro at yahoo.com
Wed Aug 23 17:46:10 EDT 2006
Tim wrote:
> I ran into a problem with a script i was playing with to check code
> indents and need some direction. It seems to depend on if tabsize is
> set to 4 in editor and spaces and tabs indents are mixed on consecutive
> lines. Works fine when editors tabsize was 8 regardless if indents are
> mixed.
>
> Below are how the 3 test files are laid out, the sample code and output
> I get.
> Any help on how to detect this correctly would be appreciated.
>
>
> # nano -T4 tabspacing_4.py
> class Test:
> """triple quote""" #indent is 1 tab
> def __init__(self, msg): #indent is 4 spaces <<
> this gets reported as a dedent when there is no change in indent level
> self.msg = msg #indent is 2 tabs
>
> #nano -T8 tabspacing_8A.py
> class Test:
> """triple quote""" #indent is 1 tab
> def __init__(self, msg): #indent is 8 spaces << no
> indent change reported
> self.msg = msg #indent is 1 tab + 4 spaces
>
> #nano -T8 tabspacing_8B.py
> class Test:
> """triple quote""" #indent is 1 tab
> def __init__(self, msg): #indent is 1 tab <<
> no indent change reported
> self.msg = msg #indent is 1 tab + 4 spaces
>
>
>
> My script
>
> #!/usr/bin/env python
>
> import tokenize
> from sys import argv
>
> indent_lvl = 0
> line_number = 0
> lines = file(argv[1]).readlines()
> done = False
>
> def parse():
>
> def feed():
>
> global line_number, lines
>
> if line_number < len(lines):
> txt = lines[line_number]
> line_number += 1
> else:
> txt = ''
>
> return txt
>
> def indents(type, token, start, end, line):
>
> global indent_lvl, done
>
> if type == tokenize.DEDENT:
> indent_lvl -= 1
> elif type == tokenize.INDENT:
> indent_lvl += 1
> elif type == tokenize.ENDMARKER:
> done = True
> return
> else:
> return
>
> print "token=%s, line_number=%i, indent_lvl=%i" %
> (tokenize.tok_name[type], start[0], indent_lvl), line.strip()
>
> while not done:
> tokenize.tokenize(feed, indents)
>
> parse()
>
>
> $ ./sample.py tabspacing_4.py
> token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
> #indent is 1 tab
> token=DEDENT, line_number=4, indent_lvl=0 def __init__(self, msg):
> #indent is 4 spaces <-- PROBLEM HERE
> token=INDENT, line_number=5, indent_lvl=1 self.msg = msg
> #indent is 2 tabs
> token=DEDENT, line_number=8, indent_lvl=0
>
> $ ./sample.py tabspacing_8A.py
> token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
> #indent is 1 tab
> token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
> #indent is 1 tab + 4 spaces
> token=DEDENT, line_number=8, indent_lvl=1
> token=DEDENT, line_number=8, indent_lvl=0
>
> $ ./sample.py tabspacing_8B.py
> token=INDENT, line_number=3, indent_lvl=1 """triple quote"""
> #indent is 1 tab
> token=INDENT, line_number=5, indent_lvl=2 self.msg = msg
> #indent is 1 tab + 4 spaces
> token=DEDENT, line_number=8, indent_lvl=1
> token=DEDENT, line_number=8, indent_lvl=0
Well, the simple answer is "Don't mix tabs and spaces." But if that's
unhelpful ;-) , check out the tabnanny script (now in the standard
library) and also the expandtabs() method of strings.
http://docs.python.org/lib/module-tabnanny.html
Peace,
~Simon
More information about the Python-list
mailing list