Python:Email and Header Parsing: Some Help
Paul McGuire
ptmcg at users.sourceforge.net
Thu Feb 26 04:42:02 EST 2004
"dont bother" <dontbotherworld at yahoo.com> wrote in message
news:mailman.149.1077785503.8594.python-list at python.org...
> I want to parse the headers separtely and message
> separately. Does anyone has an example code to deal
> with Parser?
Here is a spam cleaner that I run several times a day. My ISP run Symantec
on their end, and tag suspect e-mails with header virus tags. This program
looks for those tags, and autodeletes any Klez or Swen infected e-mails.
import poplib, re
# Change this to your needs
POPHOST = "pop-server.austin.rr.com"
POPUSER = "xyzzy"
POPPASS = "ajsdlfjslfkj"
# reg expressions for extracting header data
re_from = re.compile( "^From: (.*)" )
re_to = re.compile( "^To: (.*)" )
re_subject = re.compile( "^Subject: (.*)" )
re_virusresult = re.compile( "^X-Virus-Scan-Result: (.*)" )
def showMessage( msgHdr ):
out = ( msgHdr["msgnum"], msgHdr["From"], msgHdr["Subject"],
msgHdr["Virus"] )
print "%3d. %-30.30s %-24.24s %-24.24s" % out
def scanMailboxMsgs():
"refresh window contents"
global deleteCount
try:
# log in to mail box
pop = poplib.POP3(POPHOST)
pop.user(POPUSER)
pop.pass_( POPPASS)
connected = True
# retrieve msg headers
msgCount, msgTotalSize = pop.stat()
emptyHdr = {
"From" : "",
"To" : "",
"Subject" : "",
"Virus" : "none"
}
matchREs = [
( re_from, "From" ),
( re_to, "To" ),
( re_subject, "Subject" ),
( re_virusresult, "Virus" )
]
# for each message, display header info
for n in range( msgCount ):
msgnum = n+1 # msg nums are 1-based, not 0-based
# Retrieve message header
response, headerLines, bytes = pop.top(msgnum, 0)
hdr = emptyHdr.copy()
hdr["msgnum"] = msgnum
hdr["size"] = bytes
for line in headerLines:
for reExpr,hdrField in matchREs:
match = reExpr.match( line )
if match:
hdr[ hdrField ] = match.group(1).strip('"')
# auto-delete any msgs that had the W32.Swen virus
if hdr["Virus"].count("W32.Swen") > 0 or \
hdr["Virus"].count("W32.Klez") > 0:
showMessage( hdr )
pop.dele(msgnum)
deleteCount += 1
except poplib.error_proto, detail:
print "POP3 error:", detail
if connected :
pop.quit()
# ============= main script ===============
deleteCount = 0
scanMailboxMsgs()
print "Deleted", deleteCount, "messages"
raw_input( "Press <return> to continue" )
More information about the Python-list
mailing list