remove strings from source

M.E.Farmer mefjr75 at hotmail.com
Sat Feb 26 16:23:27 EST 2005


qwweeeit wrote:
> For a python code I am writing I need to remove all strings
> definitions from source and substitute them with a place-holder.
>
> To make clearer:
> line 45  sVar="this is the string assigned to sVar"
> must be converted in:
> line 45 sVar=s00001
>
> Such substitution is recorded in a file under:
> s0001[line 45]="this is the string assigned to sVar"
>
> For curious guys:
> I am trying to implement a cross variable reference tool and the
> variability (in lenght) of the string definitions (expecially if
> multi-line) can cause display problems.
>
> I need your help in correctly identifying the strings (also embedding
> the r'xx..' or u'yy...' as part of the string definition). The
problem
> is mainly on the multi-line definitions or in cached strings
> (embedding chr() definitions or escape sequences).

Hello,
I have written a few python parsers before.
Here is my attempt :)
# string_mapper.py
from __future__ import generators# python 2.2
import keyword, os, sys, traceback
import cStringIO, token, tokenize

def StringNamer(num=0):
    '''This is a name creating generator'''
    while 1:
        num += 1
        stringname = 's'+str(num).zfill(6)
        yield stringname

class ReplaceParser(object):
    """
>>> filein = open('yourfilehere.py').read()
>>> replacer = ReplaceParser(filein, out=sys.stdout)
>>> replacer.format()
>>> replacer.StringMap
    """

    def __init__(self, raw, out=sys.stdout):
        ''' Store the source text.
        '''
        self.raw =raw.expandtabs().strip()
        self.out = out
        self.StringName = StringNamer()
        self.StringMap = {}

    def format(self):
        ''' Parse and send the source.
        '''
        self.lines = [0, 0]
        pos = 0
        self.temp = cStringIO.StringIO()
        while 1:
            pos = self.raw.find('\n', pos) + 1
            if not pos: break
            self.lines.append(pos)
        self.lines.append(len(self.raw))
        self.pos = 0
        text = cStringIO.StringIO(self.raw)
        try:
            tokenize.tokenize(text.readline, self)
        except tokenize.TokenError, ex:
            traceback.print_exc()

    def __call__(self, toktype, toktext, (srow,scol),
                   (erow,ecol), line):
        ''' Token handler.
        '''
        oldpos = self.pos
        newpos = self.lines[srow] + scol
        self.pos = newpos + len(toktext)
        if toktype in [token.NEWLINE, tokenize.NL]:
            self.out.write('\n')
            return
        if newpos > oldpos:
            self.out.write(self.raw[oldpos:newpos])
        if toktype in [token.INDENT, token.DEDENT]:
            self.pos = newpos
            return
        if (toktype == token.STRING):
            sname = self.StringName.next()
            self.StringMap[sname] = toktext
            toktext = sname
        self.out.write(toktext)
        self.out.flush()
        return

hth,
M.E.Farmer




More information about the Python-list mailing list