how to write a text file search & replace script
Simon Brunning
SBrunning at trisystems.co.uk
Tue Mar 27 03:23:43 EST 2001
The script below might be of some use. To use it, set up a text file with
all your from values and to values in it (one pair per line, tab separated),
and run it over your files.
#!/usr/bin/env python
# Module : multirep.py
# Synopsis : Multiple replacements
# Programmer : Simon Brunning - sbrunning at bigfoot.com
# Date : 20/09/2000
# Notes : Thanks to Fredrik Lundh for the MultiReplace class, which I
have butchered.
'''Perform multiple replacements.
Takes a list of delimited from and to values from a replacements file,
and replaces the from values with the to values in the target file(s).
The target files are not changed - new version(s) of the target file(s)
are written, with the filename prefixed.
Usage : Python multirep.py [options] replacementsfile targetfile(s)
Options: -h = help
-d = delimiter (defaults to tabs, s = spaces, c = commas)
-p = prefix for new files (defaults to 'New')
-c = case insensitive match
-w = replace whole words only'''
def multirep(arguments):
import getopt, glob, operator
# Split arguments list into options and arguments
options, arguments = getopt.getopt(arguments, '?hd:p:wc')
# Set defaults
delimiter = '\t'
prefix = 'New'
wholeWords = None
caseInsensitive = None
# Options - override defaults ond show help
for option, value in options:
if option[-1] in '?h':
print; print __doc__
elif option[-1] == 'd': # Specify delimiter
if value == 's':
delimiter = ' '
elif value == 'c':
delimiter = ','
elif option[-1] == 'p': # Specify prefix
prefix = value
elif option[-1] == 'w': # Whole words
wholeWords = 1
elif option[-1] == 'c': # case insensitive
caseInsensitive = 1
# Build replacement function from replacements file
try:
replacer = MultiReplacer(arguments[0], delimiter, wholeWords,
caseInsensitive)
except IndexError:
print; print __doc__
return
except ValueError:
print; print 'Invalid replacements file.'
return
# Expand remaining arguments into target file list
try:
targetFiles = reduce(operator.add, map(glob.glob, arguments[1:]))
except TypeError:
print; print __doc__
return
# Perform replacement on each file
for file in targetFiles:
replaceFile(file, replacer, prefix)
def replaceFile(infile, replacer, prefix='New'):
import os
# Build outfile name
outfile = os.path.join(os.path.dirname(infile), ''.join((prefix,
os.path.basename(infile))))
# Read from infile, replace values, and write to outfile
open(outfile, 'wb').write(replacer(open(infile, 'rb').read()))
class MultiReplacer:
def __init__(self, replacements, delimiter='\t', wholeWords=None,
caseInsensitive=None):
# Build replacements dictionary - may come in as a mapping or as a
file
self.replacements = {}
try:
# replacements is a mapping
self.replacements.update(replacements)
except TypeError:
# replacements is a file
for line in open(replacements, 'r').readlines():
fromValue, toValue = line.split(delimiter)[:2] # Split line
while toValue[-1] in '\r\n': # Strip newlines
toValue = toValue[:-1]
self.replacements[fromValue] = toValue # Add to dictionary
# Build char to char mapping...
self.charMap = None
if not wholeWords:
charMap = map(chr, range(256))
for fromValue, toValue in self.replacements.items():
if len(fromValue) <> 1 or len(toValue) <> 1:
break
if caseInsensitive:
charMap[ord(fromValue.upper())] = toValue
charMap[ord(fromValue.lower())] = toValue
else:
charMap[ord(fromValue)] = toValue
else:
self.charMap = "".join(charMap)
return
# String to string mapping - use a regular expression
import re
fromVals = replacements.keys().sort()
# Build regexp pattern
if not wholeWords:
rePattern = '|'.join(map(re.escape, fromVals))
else:
rePattern = r'\b(' + '|'.join(map(re.escape, fromVals)) + r')\b'
# Compile regexp
if caseInsensitive:
self.rePattern = re.compile(rePattern, re.I)
else:
self.rePattern = re.compile(rePattern)
def __call__(self, string):
# apply replacement to string
# Char to char mapping
if self.charMap:
return string.translate(self.charMap)
# String to string mapping
return self.rePattern.sub(self.__replaceMatch, string)
def __replaceMatch(self, match):
item = match.group(0)
return self.replacements.get(item)
if __name__ == '__main__':
import sys
multirep(sys.argv[1:])
Cheers,
Simon Brunning
TriSystems Ltd.
sbrunning at trisystems.co.uk
-----------------------------------------------------------------------
The information in this email is confidential and may be legally privileged.
It is intended solely for the addressee. Access to this email by anyone else
is unauthorised. If you are not the intended recipient, any disclosure,
copying, distribution, or any action taken or omitted to be taken in
reliance on it, is prohibited and may be unlawful. TriSystems Ltd. cannot
accept liability for statements made which are clearly the senders own.
More information about the Python-list
mailing list