Python:Email and Header Parsing: Some Help

Paul McGuire ptmcg at users.sourceforge.net
Thu Feb 26 04:42:02 EST 2004


"dont bother" <dontbotherworld at yahoo.com> wrote in message
news:mailman.149.1077785503.8594.python-list at python.org...
> I want to parse the headers separtely and message
> separately. Does anyone has an example code to deal
> with Parser?
Here is a spam cleaner that I run several times a day.  My ISP run Symantec
on their end, and tag suspect e-mails with header virus tags.  This program
looks for those tags, and autodeletes any Klez or Swen infected e-mails.


import poplib, re

# Change this to your needs
POPHOST = "pop-server.austin.rr.com"
POPUSER = "xyzzy"
POPPASS = "ajsdlfjslfkj"

# reg expressions for extracting header data
re_from        = re.compile( "^From: (.*)" )
re_to          = re.compile( "^To: (.*)" )
re_subject     = re.compile( "^Subject: (.*)" )
re_virusresult = re.compile( "^X-Virus-Scan-Result: (.*)" )

def showMessage( msgHdr ):
  out = ( msgHdr["msgnum"], msgHdr["From"], msgHdr["Subject"],
msgHdr["Virus"] )
  print "%3d. %-30.30s   %-24.24s %-24.24s" % out

def scanMailboxMsgs():
  "refresh window contents"
  global deleteCount

  try:
    # log in to mail box
    pop = poplib.POP3(POPHOST)
    pop.user(POPUSER)
    pop.pass_( POPPASS)
    connected = True

    # retrieve msg headers
    msgCount, msgTotalSize = pop.stat()

    emptyHdr = {
      "From" : "",
      "To" : "",
      "Subject" : "",
      "Virus" : "none"
      }
    matchREs = [
      ( re_from, "From" ),
      ( re_to, "To" ),
      ( re_subject, "Subject" ),
      ( re_virusresult, "Virus" )
      ]

    # for each message, display header info
    for n in range( msgCount ):
      msgnum = n+1 # msg nums are 1-based, not 0-based

      # Retrieve message header
      response, headerLines, bytes = pop.top(msgnum, 0)

      hdr = emptyHdr.copy()
      hdr["msgnum"] = msgnum
      hdr["size"] = bytes
      for line in headerLines:
        for reExpr,hdrField in matchREs:
          match = reExpr.match( line )
          if match:
            hdr[ hdrField ] = match.group(1).strip('"')

      # auto-delete any msgs that had the W32.Swen virus
      if hdr["Virus"].count("W32.Swen") > 0 or \
         hdr["Virus"].count("W32.Klez") > 0:
        showMessage( hdr )
        pop.dele(msgnum)
        deleteCount += 1

  except poplib.error_proto, detail:
    print "POP3 error:", detail

  if connected :
    pop.quit()


# ============= main script ===============
deleteCount = 0
scanMailboxMsgs()
print "Deleted", deleteCount, "messages"

raw_input( "Press <return> to continue" )





More information about the Python-list mailing list