substitution

Peter Otten __peter__ at web.de
Mon Jan 18 08:31:50 EST 2010


Iain King wrote:

> Not sure if it's the most pythonic, but I'd probably do it like this:
> 
> def token_replace(string, subs):
>         subs = dict(subs)
>         tokens = {}
>         for i, sub in enumerate(subs):
>                 tokens[sub] = i
>                 tokens[i] = sub
>         current = [string]
>         for sub in subs:
>                 new = []
>                 for piece in current:
>                         if type(piece) == str:
>                                 chunks = piece.split(sub)
>                                 new.append(chunks[0])
>                                 for chunk in chunks[1:]:
>                                         new.append(tokens[sub])
>                                         new.append(chunk)
>                         else:
>                                 new.append(piece)
>                 current = new
>         output = []
>         for piece in current:
>                 if type(piece) == str:
>                         output.append(piece)
>                 else:
>                         output.append(subs[tokens[piece]])
>         return ''.join(output)
> 
> >>> token_replace("fooxxxbazyyyquuux", [("quuux", "foo"), ("foo", "bar"), 
("baz", "quux")])
> 'barxxxquuxyyyfoo'
> 
> I'm sure someone could whittle that down to a handful of list comps...

I tried, but failed:

def join(chunks, separator):
    chunks = iter(chunks)
    yield next(chunks)
    for chunk in chunks:
        yield separator
        yield chunk

def token_replace(string, subs):
    tokens = {}

    current = [string]
    for i, (find, replace) in enumerate(subs):
        tokens[i] = replace
        new = []
        for piece in current:
            if piece in tokens:
                new.append(piece)
            else:
                new.extend(join(piece.split(find), i))
        current = new

    return ''.join(tokens.get(piece, piece) for piece in current)

You could replace the inner loop with sum(..., []), but that would be really 
ugly.

Peter



More information about the Python-list mailing list