[Mailman-Developers] [PATCH] Port HyperArch/pipermail to mimelib
Ben Gertzfield
che@debian.org
Thu, 13 Sep 2001 15:45:29 +0900
Here's a port of HyperArch and pipermail to mimelib. This allows
proper parsing of multipart messages, and will make i18n handling much
easier. This is a big step forward, I think, because now we no
longer have two very different Message classes in Mailman.
This patch depends on the mimelib patch I just sent; it uses the
get_decoded_payload() function I added to get a nice text
representation of even a multi-part message. This will let us even
display a message for non-text parts of a message, and eventually will
let HyperArch display attachments inline. And of course, as I
mentioned in my previous mail, this will prevent base64 gobbeldygook
from showing up in the archives.
This patch even deals with multiple text/* attachments to a message,
and will include them all in the archive even if they're base64 or
quoted-printable encoded.
It currently does not deal with replacing high-ASCII characters with
HTML entities in HyperArch; I'm going to deal with that next by taking
the htmlentitydefs module's hash, inverting it, and using that as a
big global search-and-replace, if the charset is undefined or
iso-8859-1.
You can see an example message with lots of mixed parts in my
pipermail archive, at:
http://nausicaa.interq.or.jp/pipermail/test2/2001-September/000025.html
(The original message that produced this output is available at:
http://nausicaa.interq.or.jp/~ben/mime-test.txt) .
This also patches pythonlib/mailbox.py to use mimelib instead of
rfc822. This is the last use of rfc822 in Mailman, so we can now
remove pythonlib/rfc822.py completely from the archives -- now we use
mimelib entirely!
Patch follows, against current Mailman CVS.
Index: Mailman/Archiver/HyperArch.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Archiver/HyperArch.py,v
retrieving revision 2.7
diff -u -r2.7 HyperArch.py
--- Mailman/Archiver/HyperArch.py 2001/07/26 05:26:48 2.7
+++ Mailman/Archiver/HyperArch.py 2001/09/13 06:19:47
# Note: I'm overriding most, if not all of the pipermail Article class
# here -ddm
# The Article class encapsulates a single posting. The attributes are:
@@ -165,8 +164,8 @@
# Snag the content-* headers. RFC 1521 states that their values are
# case insensitive.
- ctype = message.getheader('Content-Type', 'text/plain')
- cenc = message.getheader('Content-Transfer-Encoding', '')
+ ctype = message.gettype("text/plain")
+ cenc = message.get('Content-Transfer-Encoding', '')
self.ctype = string.lower(ctype)
self.cenc = string.lower(cenc)
self.decoded = {}
@@ -283,42 +282,14 @@
next = next_wsubj = ""
return next, next_wsubj
- _rx_quote = re.compile('=([A-F0-9][A-F0-9])')
- _rx_softline = re.compile('=[ \t]*$')
-
def _get_body(self):
"""Return the message body ready for HTML, decoded if necessary"""
try:
body = self.html_body
except AttributeError:
body = self.body
- if self.charset is None or self.cenc != "quoted-printable":
- return null_to_space(string.join(body, ""))
- # the charset is specified and the body is quoted-printable
- # first get rid of soft line breaks, then decode literals
- lines = []
- rx = self._rx_softline
- for line in body:
- mo = rx.search(line)
- if mo:
- i = string.rfind(line, "=")
- line = line[:i]
- lines.append(line)
- buf = string.join(lines, "")
-
- chunks = []
- offset = 0
- rx = self._rx_quote
- while 1:
- mo = rx.search(buf, offset)
- if not mo:
- chunks.append(buf[offset:])
- break
- i = mo.start()
- chunks.append(buf[offset:i])
- offset = i + 3
- chunks.append(chr(string.atoi(mo.group(1), 16)))
- return null_to_space(string.join(chunks, ""))
+
+ return null_to_space(string.join(body, ""))
def _add_decoded(self, d):
"""Add encoded-word keys to HTML output"""
Index: Mailman/Archiver/pipermail.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/Archiver/pipermail.py,v
retrieving revision 2.8
diff -u -r2.8 pipermail.py
--- Mailman/Archiver/pipermail.py 2001/08/17 05:41:25 2.8
+++ Mailman/Archiver/pipermail.py 2001/09/13 06:19:47
@@ -7,6 +7,11 @@
import string
import time
+from Mailman.pythonlib.StringIO import StringIO
+
+from mimelib.date import parsedate_tz
+from mimelib.address import parseaddr
+
try:
import cPickle
pickle = cPickle
@@ -20,7 +25,6 @@
from Mailman.Mailbox import Mailbox
from Mailman.i18n import _
-
msgid_pat = re.compile(r'(<.*>)')
def strip_separators(s):
@@ -155,7 +159,7 @@
self.parentID = None
self.threadKey = None
# otherwise the current sequence number is used.
- id = strip_separators(message.getheader('Message-Id'))
+ id = strip_separators(message['Message-Id'])
if id == "":
self.msgid = str(self.sequence)
else: self.msgid = id
@@ -169,8 +173,8 @@
self._set_date(message)
# Figure out the e-mail address and poster's name
- self.author, self.email = message.getaddr('From')
- e = message.getheader('Reply-To')
+ self.author, self.email = parseaddr(message['From'])
+ e = message['Reply-To']
if e is not None:
self.email = e
self.email = strip_separators(self.email)
@@ -184,11 +188,11 @@
# shouldn't be necessary, but changing this may break code. For
# safety, I save the original headers on different attributes for use
# in writing the plain text periodic flat files.
- self._in_reply_to = message.getheader('in-reply-to')
- self._references = message.getheader('references')
- self._message_id = message.getheader('message-id')
+ self._in_reply_to = message['in-reply-to']
+ self._references = message['references']
+ self._message_id = message['message-id']
- i_r_t = message.getheader('In-Reply-To')
+ i_r_t = message['In-Reply-To']
if i_r_t is None:
self.in_reply_to = ''
else:
@@ -196,7 +200,7 @@
if match is None: self.in_reply_to = ''
else: self.in_reply_to = strip_separators(match.group(1))
- references = message.getheader('References')
+ references = message['References']
if references is None:
self.references = []
else:
@@ -209,14 +213,12 @@
if message.has_key(i):
self.headers[i] = message[i]
- # Read the message body
- message.rewindbody()
- self.body = message.fp.readlines()
+ self.body = message.get_decoded_payload()
def _set_date(self, message):
if message.has_key('Date'):
self.datestr = str(message['Date'])
- date = message.getdate_tz('Date')
+ date = parsedate_tz(self.datestr)
else:
self.datestr = ''
date = None
Index: Mailman/pythonlib/cgi.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/pythonlib/cgi.py,v
retrieving revision 2.0
diff -u -r2.0 cgi.py
--- Mailman/pythonlib/cgi.py 2000/12/07 16:53:20 2.0
+++ Mailman/pythonlib/cgi.py 2001/09/13 06:19:47
@@ -30,7 +30,8 @@
import os
import urllib
import mimetools
-import rfc822
+from Mailman import Message
+from mimelib import Parser
import UserDict
from StringIO import StringIO
@@ -462,6 +463,7 @@
self.filename = None
if pdict.has_key('filename'):
self.filename = pdict['filename']
+ self.parser = Parser.Parser(Message.Message)
# Process content-type header
#
Index: Mailman/pythonlib/mailbox.py
===================================================================
RCS file: /cvsroot/mailman/mailman/Mailman/pythonlib/mailbox.py,v
retrieving revision 1.1
diff -u -r1.1 mailbox.py
--- Mailman/pythonlib/mailbox.py 2001/02/15 06:09:38 1.1
+++ Mailman/pythonlib/mailbox.py 2001/09/13 06:19:47
@@ -3,13 +3,15 @@
"""Classes to handle Unix style, MMDF style, and MH style mailboxes."""
-import rfc822
+from Mailman import Message
+from mimelib import Parser
import os
__all__ = ["UnixMailbox","MmdfMailbox","MHMailbox","Maildir","BabylMailbox"]
+parser = Parser.Parser(Message.Message)
class _Mailbox:
- def __init__(self, fp, factory=rfc822.Message):
+ def __init__(self, fp, factory=parser.parse):
self.fp = fp
self.seekp = 0
self.factory = factory
@@ -184,7 +186,7 @@
class MHMailbox:
- def __init__(self, dirname, factory=rfc822.Message):
+ def __init__(self, dirname, factory=parser.parse):
import re
pat = re.compile('^[1-9][0-9]*$')
self.dirname = dirname
@@ -211,7 +213,7 @@
class Maildir:
# Qmail directory mailbox
- def __init__(self, dirname, factory=rfc822.Message):
+ def __init__(self, dirname, factory=parser.parse):
self.dirname = dirname
self.factory = factory