[Mailman-Developers] [PATCH] Port HyperArch/pipermail to mimelib

Ben Gertzfield che@debian.org
Thu, 13 Sep 2001 15:45:29 +0900


Here's a port of HyperArch and pipermail to mimelib.  This allows
proper parsing of multipart messages, and will make i18n handling much
easier.  This is a big step forward, I think, because now we no
longer have two very different Message classes in Mailman.

This patch depends on the mimelib patch I just sent; it uses the
get_decoded_payload() function I added to get a nice text
representation of even a multi-part message.  This will let us even
display a message for non-text parts of a message, and eventually will
let HyperArch display attachments inline.  And of course, as I
mentioned in my previous mail, this will prevent base64 gobbeldygook
from showing up in the archives.

This patch even deals with multiple text/* attachments to a message,
and will include them all in the archive even if they're base64 or
quoted-printable encoded.

It currently does not deal with replacing high-ASCII characters with
HTML entities in HyperArch; I'm going to deal with that next by taking
the htmlentitydefs module's hash, inverting it, and using that as a
big global search-and-replace, if the charset is undefined or
iso-8859-1. 

You can see an example message with lots of mixed parts in my
pipermail archive, at:

http://nausicaa.interq.or.jp/pipermail/test2/2001-September/000025.html

(The original message that produced this output is available at:
http://nausicaa.interq.or.jp/~ben/mime-test.txt) .

This also patches pythonlib/mailbox.py to use mimelib instead of
rfc822.  This is the last use of rfc822 in Mailman, so we can now
remove pythonlib/rfc822.py completely from the archives -- now we use
mimelib entirely!

Patch follows, against current Mailman CVS.

Index: Mailman/Archiver/HyperArch.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Archiver/HyperArch.py,v
retrieving revision 2.7
diff -u -r2.7 HyperArch.py
--- Mailman/Archiver/HyperArch.py	2001/07/26 05:26:48	2.7
+++ Mailman/Archiver/HyperArch.py	2001/09/13 06:19:47
 # Note: I'm overriding most, if not all of the pipermail Article class
 #       here -ddm
 # The Article class encapsulates a single posting.  The attributes are:
@@ -165,8 +164,8 @@
 
         # Snag the content-* headers.  RFC 1521 states that their values are
         # case insensitive.
-        ctype = message.getheader('Content-Type', 'text/plain')
-        cenc = message.getheader('Content-Transfer-Encoding', '')
+        ctype = message.gettype("text/plain")
+        cenc = message.get('Content-Transfer-Encoding', '')
         self.ctype = string.lower(ctype)
         self.cenc = string.lower(cenc)
         self.decoded = {}
@@ -283,42 +282,14 @@
             next = next_wsubj = ""
         return next, next_wsubj
 
-    _rx_quote = re.compile('=([A-F0-9][A-F0-9])')
-    _rx_softline = re.compile('=[ \t]*$')
-
     def _get_body(self):
         """Return the message body ready for HTML, decoded if necessary"""
         try:
             body = self.html_body
         except AttributeError:
             body = self.body
-        if self.charset is None or self.cenc != "quoted-printable":
-            return null_to_space(string.join(body, ""))
-        # the charset is specified and the body is quoted-printable
-        # first get rid of soft line breaks, then decode literals
-        lines = []
-        rx = self._rx_softline
-        for line in body:
-            mo = rx.search(line)
-            if mo:
-                i = string.rfind(line, "=")
-                line = line[:i]
-            lines.append(line)
-        buf = string.join(lines, "")
-        
-        chunks = []
-        offset = 0
-        rx = self._rx_quote
-        while 1:
-            mo = rx.search(buf, offset)
-            if not mo:
-                chunks.append(buf[offset:])
-                break
-            i = mo.start()
-            chunks.append(buf[offset:i])
-            offset = i + 3
-            chunks.append(chr(string.atoi(mo.group(1), 16)))
-        return null_to_space(string.join(chunks, ""))
+
+        return null_to_space(string.join(body, ""))
 
     def _add_decoded(self, d):
         """Add encoded-word keys to HTML output"""
Index: Mailman/Archiver/pipermail.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Archiver/pipermail.py,v
retrieving revision 2.8
diff -u -r2.8 pipermail.py
--- Mailman/Archiver/pipermail.py	2001/08/17 05:41:25	2.8
+++ Mailman/Archiver/pipermail.py	2001/09/13 06:19:47
@@ -7,6 +7,11 @@
 import string
 import time
 
+from Mailman.pythonlib.StringIO import StringIO
+
+from mimelib.date import parsedate_tz
+from mimelib.address import parseaddr
+
 try:
     import cPickle
     pickle = cPickle
@@ -20,7 +25,6 @@
 from Mailman.Mailbox import Mailbox
 from Mailman.i18n import _
 
-
 
 msgid_pat = re.compile(r'(<.*>)')
 def strip_separators(s):
@@ -155,7 +159,7 @@
 	self.parentID = None
         self.threadKey = None
 	# otherwise the current sequence number is used.
-	id = strip_separators(message.getheader('Message-Id'))
+	id = strip_separators(message['Message-Id'])
 	if id == "":
             self.msgid = str(self.sequence)
 	else: self.msgid = id
@@ -169,8 +173,8 @@
         self._set_date(message)
 
 	# Figure out the e-mail address and poster's name
-	self.author, self.email = message.getaddr('From')
-	e = message.getheader('Reply-To')
+	self.author, self.email = parseaddr(message['From'])
+	e = message['Reply-To']
 	if e is not None:
             self.email = e
 	self.email = strip_separators(self.email)
@@ -184,11 +188,11 @@
         # shouldn't be necessary, but changing this may break code.  For
         # safety, I save the original headers on different attributes for use
         # in writing the plain text periodic flat files.
-        self._in_reply_to = message.getheader('in-reply-to')
-        self._references = message.getheader('references')
-        self._message_id = message.getheader('message-id')
+        self._in_reply_to = message['in-reply-to']
+        self._references = message['references']
+        self._message_id = message['message-id']
 
-	i_r_t = message.getheader('In-Reply-To')
+	i_r_t = message['In-Reply-To']
 	if i_r_t is None:
             self.in_reply_to = ''
 	else:
@@ -196,7 +200,7 @@
 	    if match is None: self.in_reply_to = ''
 	    else: self.in_reply_to = strip_separators(match.group(1))
 		
-	references = message.getheader('References')
+	references = message['References']
 	if references is None:
             self.references = []
 	else:
@@ -209,14 +213,12 @@
 	    if message.has_key(i):
                 self.headers[i] = message[i]
 
-	# Read the message body
-	message.rewindbody()
-        self.body = message.fp.readlines()
+        self.body = message.get_decoded_payload()
 
     def _set_date(self, message):
 	if message.has_key('Date'): 
 	    self.datestr = str(message['Date'])
-   	    date = message.getdate_tz('Date')
+   	    date = parsedate_tz(self.datestr)
 	else: 
 	    self.datestr = ''
 	    date = None
Index: Mailman/pythonlib/cgi.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/pythonlib/cgi.py,v
retrieving revision 2.0
diff -u -r2.0 cgi.py
--- Mailman/pythonlib/cgi.py	2000/12/07 16:53:20	2.0
+++ Mailman/pythonlib/cgi.py	2001/09/13 06:19:47
@@ -30,7 +30,8 @@
 import os
 import urllib
 import mimetools
-import rfc822
+from Mailman import Message
+from mimelib import Parser
 import UserDict
 from StringIO import StringIO
 
@@ -462,6 +463,7 @@
         self.filename = None
         if pdict.has_key('filename'):
             self.filename = pdict['filename']
+        self.parser = Parser.Parser(Message.Message)
 
         # Process content-type header
         #
Index: Mailman/pythonlib/mailbox.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/pythonlib/mailbox.py,v
retrieving revision 1.1
diff -u -r1.1 mailbox.py
--- Mailman/pythonlib/mailbox.py	2001/02/15 06:09:38	1.1
+++ Mailman/pythonlib/mailbox.py	2001/09/13 06:19:47
@@ -3,13 +3,15 @@
 """Classes to handle Unix style, MMDF style, and MH style mailboxes."""
 
 
-import rfc822
+from Mailman import Message
+from mimelib import Parser
 import os
 
 __all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
+parser = Parser.Parser(Message.Message)
 
 class _Mailbox:
-    def __init__(self, fp, factory=rfc822.Message):
+    def __init__(self, fp, factory=parser.parse):
         self.fp = fp
         self.seekp = 0
         self.factory = factory
@@ -184,7 +186,7 @@
 
 
 class MHMailbox:
-    def __init__(self, dirname, factory=rfc822.Message):
+    def __init__(self, dirname, factory=parser.parse):
         import re
         pat = re.compile('^[1-9][0-9]*$')
         self.dirname = dirname
@@ -211,7 +213,7 @@
 class Maildir:
     # Qmail directory mailbox
 
-    def __init__(self, dirname, factory=rfc822.Message):
+    def __init__(self, dirname, factory=parser.parse):
         self.dirname = dirname
         self.factory = factory