Replace string except inside quotes?

M.E.Farmer mefjr75 at hotmail.com
Sat Dec 4 03:13:00 EST 2004


"Raymond Hettinger" <vze4rx4y at verizon.net> wrote in message 
> The source for the tokenize module covers all these bases.

> Raymond Hettinger

# tokenize text replace

import keyword, os, sys, traceback
import string, cStringIO
import token, tokenize

######################################################################

class Parser:
    """python source code tokenizing text replacer
    """
    def __init__(self, raw, out=sys.stdout):
        ''' Store the source text & set some flags.
        '''
        self.raw = string.strip(string.expandtabs(raw))
        self.out = out

    def format(self, search='' ,replace='',
                replacetokentype=token.NAME):
        ''' Parse and send text.
        '''
        # Store line offsets in self.lines
        self.lines = [0, 0]
        pos = 0
        self.temp = cStringIO.StringIO()
        self.searchtext = search
        self.replacetext  = replace
        self.replacetokentype = replacetokentype

        # Gather lines
        while 1:
            pos = string.find(self.raw, '\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))

        # Wrap text in a filelike object
        self.pos = 0
        text = cStringIO.StringIO(self.raw)

        # Parse the source.
        ## Tokenize calls the __call__ 
        ## function for each token till done.
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            traceback.print_exc()


    def __call__(self, toktype, toktext, 
                 (srow,scol), (erow,ecol), line):
        ''' Token handler.
        '''
        # calculate new positions
        oldpos = self.pos
        newpos = self.lines[srow] + scol
        self.pos = newpos + len(toktext)

        # handle newlines
        if toktype in [token.NEWLINE, tokenize.NL]:
            self.out.write('\n')
            return

        # send the original whitespace, if needed
        if newpos > oldpos:
            self.out.write(self.raw[oldpos:newpos])

        # skip indenting tokens
        if toktype in [token.INDENT, token.DEDENT]:
            self.pos = newpos
            return

        # search for matches to our searchtext
        # customize this for your exact needs 
        if (toktype == self.replacetokentype and
            toktext == self.searchtext):
            toktext = self.replacetext

        # write it out
        self.out.write(toktext)
        return

######################################################################
# just an example
def Main():
    import sys
    if sys.argv[0]:
        filein = open(sys.argv[0]).read()
        Parser(filein, out=sys.stdout).format('tokenize', 'MyNewName')

######################################################################

if __name__ == '__main__':
    Main()

# end of code


This is an example of how to use tokenize to replace names
 that match a search string.
If you wanted to only replace strings and not
 names then change the replacetokentype to 
 token.STRING instead of token.NAME etc...
HTH,
 M.E.Farmer



More information about the Python-list mailing list