[Spambayes-checkins] spambayes unheader.py,NONE,1.1
Skip Montanaro
montanaro@users.sourceforge.net
Fri, 06 Sep 2002 22:50:44 -0700
Update of /cvsroot/spambayes/spambayes
In directory usw-pr-cvs1:/tmp/cvs-serv19376
Added Files:
unheader.py
Log Message:
script to remove unwanted headers from mbox files
--- NEW FILE: unheader.py ---
#!/usr/bin/env python
import re
import sys
import mailbox
import email.Parser
import email.Message
import getopt
def unheader(msg, pat):
pat = re.compile(pat)
for hdr in msg.keys():
if pat.match(hdr):
del msg[hdr]
class Message(email.Message.Message):
def replace_header(self, hdr, newval):
"""replace first value for hdr with newval"""
hdr = hdr.lower()
for (i, (k, v)) in enumerate(self._headers):
if k.lower() == hdr:
self._headers[i] = (k, newval)
class Parser(email.Parser.Parser):
def __init__(self):
email.Parser.Parser.__init__(self, Message)
def deSA(msg):
if msg['X-Spam-Status']:
if msg['X-Spam-Status'].startswith('Yes'):
pct = msg['X-Spam-Prev-Content-Type']
if pct:
msg['Content-Type'] = pct
pcte = msg['X-Spam-Prev-Content-Transfer-Encoding']
if pcte:
msg['Content-Transfer-Encoding'] = pcte
subj = re.sub(r'\*\*\*\*\*SPAM\*\*\*\*\* ', '', msg['Subject'])
if subj != msg["Subject"]:
msg.replace_header("Subject", subj)
body = msg.get_payload()
newbody = []
at_start = 1
for line in body.splitlines():
if at_start and line.startswith('SPAM: '):
continue
elif at_start:
at_start = 0
else:
newbody.append(line)
msg.set_payload("\n".join(newbody))
unheader(msg, "X-Spam-")
def process_mailbox(f, dosa=1, pats=None):
for msg in mailbox.PortableUnixMailbox(f, Parser().parse):
if pats is not None:
unheader(msg, pats)
if dosa:
deSA(msg)
print msg
def usage():
print >> sys.stderr, "usage: unheader.py [ -p pat ... ] [ -s ]"
print >> sys.stderr, "-p pat gives a regex pattern used to eliminate unwanted headers"
print >> sys.stderr, "'-p pat' may be given multiple times"
print >> sys.stderr, "-s tells not to remove SpamAssassin headers"
def main(args):
headerpats = []
dosa = 1
try:
opts, args = getopt.getopt(args, "p:sh")
except getopt.GetoptError:
usage()
sys.exit(1)
else:
for opt, arg in opts:
if opt == "-h":
usage()
sys.exit(0)
elif opt == "-p":
headerpats.append(arg)
elif opt == "-s":
dosa = 0
pats = headerpats and "|".join(headerpats) or None
if not args:
f = sys.stdin
elif len(args) == 1:
f = file(args[0])
else:
usage()
sys.exit(1)
process_mailbox(f, dosa, pats)
if __name__ == "__main__":
main(sys.argv[1:])