Strip white spaces from source

Richie Hindle richie at entrian.com
Mon May 9 06:47:24 EDT 2005


[qwweeeit]
> I need to limit as much as possible the lenght of a source line,
> stripping white spaces (except indentation).
> For example:
> .   .   max_move and AC_RowStack.acceptsCards ( self, from_stack, cards
> )
> must be reduced to:
> .   .   max_move and AC_RowStack.acceptsCards(self,from_stack,cards)

Here's a script that does some of what you want (stripping whitespace within
the three types of brackets).  It was written to make code more compliant with
the Python style guide.

------------------------------- unspace.py -------------------------------

"""Strips spaces from inside brackets in Python source code, turning
( this ) into (this) and [ 1, ( 2, 3 ) ] into [1, (2, 3)].  This makes
the code more compliant with the Python style guide.  Usage:

  unspace.py filename

Output goes to stdout.

This file is deliberately written with lots of spaces within brackets,
so you can use it as test input.
"""

import sys, re, token, tokenize

OPEN = [ '(', '[', '{' ]
CLOSE = [ ')', ']', '}' ]

class UnSpace:
    """Holds the state of the process; onToken is a tokenize.tokenize
    callback.
    """
    def __init__( self ):
        self.line = None     # The text of the current line.
        self.number = -1     # The line number of the current line.
        self.deleted = 0     # How many spaces have been deleted from 'line'.

        self.last_srow = 0
        self.last_scol = 0
        self.last_erow = 0
        self.last_ecol = 0
        self.last_line = ''

    def onToken( self, type, tok, ( srow, scol ), ( erow, ecol ), line ):
        """tokenize.tokenize callback."""
        # Print trailing backslashes plus the indent for new lines.
        if self.last_erow != srow:
            match = re.search( r'(\s+\\\n)$', self.last_line )
            if match:
                sys.stdout.write( match.group( 1 ) )
            sys.stdout.write( line[ :scol ] )

        # Print intertoken whitespace except the stuff to strip.
        if self.last_srow == srow and \
           not ( self.last_type == token.OP and self.last_tok in OPEN ) and \
           not ( type == token.OP and tok in CLOSE ):
            sys.stdout.write( line[ self.last_ecol:scol ] )

        # Print the token itself.
        sys.stdout.write( tok )

        # Remember the properties of this token.
        self.last_srow, self.last_scol = ( srow, scol )
        self.last_erow, self.last_ecol = ( erow, ecol )
        self.last_type, self.last_tok = type, tok
        self.last_line = line

    def flush( self ):
        if self.line is not None:
            sys.stdout.write( self.line )


if __name__ == '__main__':
    if len( sys.argv ) != 2:
        print __doc__
    else:
        file = open( sys.argv[ 1 ], 'rt' )
        unSpace = UnSpace()
        tokenize.tokenize( file.readline, unSpace.onToken )
        unSpace.flush()

-- 
Richie Hindle
richie at entrian.com




More information about the Python-list mailing list