[Mailman-Developers] Quoted printable umlaut quick hack

Peter Funk pf@artcom-gmbh.de
Mon, 7 Feb 2000 17:52:31 +0100 (MET)


I'm running mailman 1.1 on our intranet web server.

Since our lists are in German language, umlauts occure
quite often in mail bodies and subjects: äöü ÄÖÜ ß.
Depending on the used MUA these are often encoded
as quoted printables and end up as ugly trash in the
pipermail HTML archives: F=FCr instead of Für ...
Unfortunately this dooes not only look ugly but also
prevents HTdig (the search engine) to find any words with
umlauts in the pipermail archives.  

So today I did some quick hacks to HyperArch.py which at
least work for me.  (Spend too much time on this detail already)
My dirty patch against HyperArch.py is appended below.
May be this will help somebody to do this right.

Regards, Peter
-- 
Peter Funk, Oldenburger Str.86, 27777 Ganderkesee, Tel: 04222 9502 70, Fax: -60
---- 8< ---- 8< ---- cut here ---- 8< ---- schnipp ---- 8< ---- schnapp ----
--- /home/pf/freeware/mailman-1.1/Mailman/Archiver/HyperArch.py Sat Aug 21 07:13:23 1999
+++ HyperArch.py        Mon Feb  7 17:36:07 2000
@@ -34,6 +34,7 @@
 import sys
 import re, cgi, urllib, string
 import time, pickle, os, posixfile
+import quopri, cStringIO
 import HyperDatabase
 import pipermail
 from Mailman import mm_cfg
@@ -49,6 +50,7 @@
     except ImportError:
         pass
 
+umlaut_in_subj = re.compile(r"=\?iso-8859-1\?Q\?(.*)\?=", re.IGNORECASE)
 
 def html_quote(s):
     repls = ( ('&', '&amp;'),
@@ -228,6 +230,16 @@
                self.subject=self.subject[i:]
            else: i=-1
        if self.subject=="": self.subject='No subject'
+       # --Pefus quick Hack:
+       umls = umlaut_in_subj.search(self.subject)
+       if umls:
+           umlconvin = cStringIO.StringIO(
+                                  umlaut_in_subj.sub(r"\1", self.subject))
+           umlconvout = cStringIO.StringIO()
+           quopri.decode(umlconvin, umlconvout)
+           umlconvout.seek(0)
+           self.subject = string.join(umlconvout.read(), "")
+       # --
 
        if message.has_key('Date'): 
            self.datestr=str(message['Date'])
@@ -239,7 +251,12 @@
            date, tzoffset=date[:9], date[-1] 
             if not tzoffset:
                 tzoffset = 0
-           date=time.mktime(date)-tzoffset
+           try:
+               date=time.mktime(date)-tzoffset
+           except ValueError:
+               print "illegal date discovered:", date
+               date=time.time()
+
        else:
            date=self.__last_article_time+1 
            
@@ -276,8 +293,13 @@
        # Read the message body
        self.body=[]
        message.rewindbody()
+       converterInput=cStringIO.StringIO(
+              string.join(message.fp.readlines(), ""))
+       converterOutput=cStringIO.StringIO()
+       quopri.decode(converterInput, converterOutput)
+       converterOutput.seek(0)
        while (1):
-           line=message.fp.readline()
+           line=converterOutput.readline()
            if line=="": break
            self.body.append(line)