[Tutor] Newbie - Simple mailing list archiver
Barnaby Scott
bds@waywood.co.uk
Sun Mar 30 21:13:01 2003
I have to apologise - since I posted my first message I have be wrestling
solidly with this problem, and have managed to write my script. I suspect it
contains mistakes - some of which will probably appear incredibly stupid!
May I therefore change my request to asking for comments on what I have
done, and pointers to things which are wrong.
One thing to bear in mind - I am going to have to run this on a machine
which only has version 1.5.2 (until I can persuade them to upgrade), so I
have taken that version's documentation as my guide.
Thanks in advance for any pointers - please be kind though, it's my first
effort.
import sys, string, rfc822, mimetools, multifile
#paths
pathtolookupfile = '/I/Will/Fill/This/In Later/'
pathtowikifiles = '/Assuming/I/Remeber/'
#Read mail message from STDIN
msg = rfc822.Message(sys.stdin)
#Get these header values:
#From
fromtuple = msg.getaddr('From')
from = fromtuple[0]
#Subject (or specify 'no subject')
subject = msg.getheader('Subject', 'no subject')
#Message-ID
messageid = msg.getheader('Message-ID')
#Date (in a short, consistent format)
datetuple = msg.getdate('Date')
if datetuple == None:
datetuple = time.localtime()
date = `datetuple[2]` + '/' + `datetuple[1]` + '/' + `datetuple[0]`
#In-Reply-To, or failing that the last value in References, if either exist
if msg.has_key('In-Reply-To'):
parentmailid = msg.getheader('In-Reply-To')
elif msg.has_key('References'):
references = msg.getheader('References')
refsl = string.split(references)
parentmailid = refsl.pop()
else:
parentmailid = None
#Content-Type
contenttype = msg.getheader('Content-Type', 'text/plain')
#If Content-Type is multipart/*, get only the body section that is
text/plain
if contenttype[:10] == "multipart/":
mimemsg = mimetools.Message(sys.__stdin__)
boundary = mimemsg.getparam('boundary')
mf = multifile.MultiFile(sys.__stdin__)
mf.push(boundary)
while mf.next():
msgpart = mimetools.Message(mf)
if msgpart.gettype() == 'text/plain':
bodytext = msgpart.fp.read()
break
#Else if Content-Type is text/plain, get the body text
elif contenttype[:10] == "text/plain":
bodytext = msg.fp.read()
else:
bodytext = 'no body text suitable for archive'
#Open my 'messageIDs' file (a lookup file which stores, from previous
messages, the value pairs: Original Message-ID, An integer messageID)
msgids = {}
f1 = open(pathtolookupfile + "messageIDs", 'w')
msgids = pickle.load(f1)
#Find there the highest existing integer messageID and generate a new one by
adding 1
l1 = msgids.values()
newintid = max(l1) + 1
#Append to the 'messageIDs' file:
# This message's Message-ID, Our newly generated integer messageID
msgids.append(messageid: newintid)
pickle.dump(msgids, f1)
f1.close()
#Open my 'ArchiveIndex' file (a wiki page which lists all messages in
threads, with a specific method of indenting)
f2 = open("ArchiveIndex", 'w')
#If there is an In-Reply-To or References value
if parentmailid != None:
# Look in the 'messageIDs' dictionary for this Message-ID and return that
message's corresponding integer messageID
parentintid = msgids[parentmailid]
# Look in the 'ArchiveIndex' file to find this integer at the beginning of a
line (save for preceding spaces and '*')
f2 = open("pathtowikifiles/ArchiveIndex", 'w')
l2 = f2.readlines()
for s in l2:
p = string.find('*' + `parentintid` + ':')
if p != -1:
indentspaces = len(s) - len(string.lstrip(s)) + 1
insertpos = l2.index(s) + 1
break
else:
indentspaces = 1
insertpos = 0
#Else
else:
indentspaces = 1
insertpos = 0
#write new line to wiki page ArchiveIndex
newarchiveentry = ' ' * indentspaces + '*' + `newintid` + ': ' + subject + '
' + from + ' ' + date + ' [ArchivedMessage' + `newintid` + ' ' + 'View]\n\n'
l2.insert(insertpos, newarchiveentry)
f2.write(l2)
f2.close()
#Create and open a new file called 'ArchivedMessage' + integer messageID
f3 = open("pathtowikifiles/ArchivedMessage" + `newintid`, 'w')
#Write to this file:
# From
# Subject
# Date
# plain text body
f3.write(senderemail + '/n/n' + subject + '/n/n' + date + '/n/n' + bodytext)
#Close the 'ArchivedMessage?' file
f3.close()