[Python-checkins] python/dist/src/Lib/email _parseaddr.py,NONE,1.5.2.1 Charset.py,1.7.2.3,1.7.2.4 Generator.py,1.6.10.3,1.6.10.4 Header.py,1.13.2.2,1.13.2.3 MIMEText.py,1.3.10.1,1.3.10.2 Message.py,1.9.6.2,1.9.6.3 Parser.py,1.5.10.3,1.5.10.4 Utils.py,1.9.6.1,1.9.6.2 init.py,1.4.10.4,1.4.10.5 _compat21.py,1.4.2.1,1.4.2.2 _compat22.py,1.4.2.1,1.4.2.2 base64MIME.py,1.5.2.1,1.5.2.2 quopriMIME.py,1.4.2.1,1.4.2.2

Fri, 21 Mar 2003 13:09:34 -0800

Update of /cvsroot/python/python/dist/src/Lib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv27730/Lib/email

Modified Files:
      Tag: release22-maint
	Charset.py Generator.py Header.py MIMEText.py Message.py 
	Parser.py Utils.py __init__.py _compat21.py _compat22.py 
	base64MIME.py quopriMIME.py 
Added Files:
      Tag: release22-maint
	_parseaddr.py 
Log Message:
Backporting email 2.5 to Python 2.2 maint branch.

--- NEW FILE: _parseaddr.py ---
# Copyright (C) 2002 Python Software Foundation

"""Email address parsing code.

Lifted directly from rfc822.py.  This should eventually be rewritten.
"""

import time
from types import TupleType

try:
    True, False
except NameError:
    True = 1
    False = 0

SPACE = ' '
EMPTYSTRING = ''
COMMASPACE = ', '

# Parse a date field
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
               'aug', 'sep', 'oct', 'nov', 'dec',
               'january', 'february', 'march', 'april', 'may', 'june', 'july',
               'august', 'september', 'october', 'november', 'december']

_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']

# The timezone table does not include the military time zones defined
# in RFC822, other than Z.  According to RFC1123, the description in
# RFC822 gets the signs wrong, so we can't rely on any such time
# zones.  RFC1123 recommends that numeric timezone indicators be used
# instead of timezone names.

_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
              'AST': -400, 'ADT': -300,  # Atlantic (used in Canada)
              'EST': -500, 'EDT': -400,  # Eastern
              'CST': -600, 'CDT': -500,  # Central
              'MST': -700, 'MDT': -600,  # Mountain
              'PST': -800, 'PDT': -700   # Pacific
              }

def parsedate_tz(data):
    """Convert a date string to a time tuple.

    Accounts for military timezones.
    """
    data = data.split()
    # The FWS after the comma after the day-of-week is optional, so search and
    # adjust for this.
    if data[0].endswith(',') or data[0].lower() in _daynames:
        # There's a dayname here. Skip it
        del data[0]
    else:
        i = data[0].rfind(',')
        if i < 0:
            return None
        data[0] = data[0][i+1:]
    if len(data) == 3: # RFC 850 date, deprecated
        stuff = data[0].split('-')
        if len(stuff) == 3:
            data = stuff + data[1:]
    if len(data) == 4:
        s = data[3]
        i = s.find('+')
        if i > 0:
            data[3:] = [s[:i], s[i+1:]]
        else:
            data.append('') # Dummy tz
    if len(data) < 5:
        return None
    data = data[:5]
    [dd, mm, yy, tm, tz] = data
    mm = mm.lower()
    if mm not in _monthnames:
        dd, mm = mm, dd.lower()
        if mm not in _monthnames:
            return None
    mm = _monthnames.index(mm) + 1
    if mm > 12:
        mm -= 12
    if dd[-1] == ',':
        dd = dd[:-1]
    i = yy.find(':')
    if i > 0:
        yy, tm = tm, yy
    if yy[-1] == ',':
        yy = yy[:-1]
    if not yy[0].isdigit():
        yy, tz = tz, yy
    if tm[-1] == ',':
        tm = tm[:-1]
    tm = tm.split(':')
    if len(tm) == 2:
        [thh, tmm] = tm
        tss = '0'
    elif len(tm) == 3:
        [thh, tmm, tss] = tm
    else:
        return None
    try:
        yy = int(yy)
        dd = int(dd)
        thh = int(thh)
        tmm = int(tmm)
        tss = int(tss)
    except ValueError:
        return None
    tzoffset = None
    tz = tz.upper()
    if _timezones.has_key(tz):
        tzoffset = _timezones[tz]
    else:
        try:
            tzoffset = int(tz)
        except ValueError:
            pass
    # Convert a timezone offset into seconds ; -0500 -> -18000
    if tzoffset:
        if tzoffset < 0:
            tzsign = -1
            tzoffset = -tzoffset
        else:
            tzsign = 1
        tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
    tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
    return tuple

def parsedate(data):
    """Convert a time string to a time tuple."""
    t = parsedate_tz(data)
    if isinstance(t, TupleType):
        return t[:9]
    else:
        return t

def mktime_tz(data):
    """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
    if data[9] is None:
        # No zone info, so localtime is better assumption than GMT
        return time.mktime(data[:8] + (-1,))
    else:
        t = time.mktime(data[:8] + (0,))
        return t - data[9] - time.timezone

def quote(str):
    """Add quotes around a string."""
    return str.replace('\\', '\\\\').replace('"', '\\"')

class AddrlistClass:
    """Address parser class by Ben Escoto.

    To understand what this class does, it helps to have a copy of RFC 2822 in
    front of you.

    Note: this class interface is deprecated and may be removed in the future.
    Use rfc822.AddressList instead.
    """

    def __init__(self, field):
        """Initialize a new instance.

        `field' is an unparsed address header field, containing
        one or more addresses.
        """
        self.specials = '()<>@,:;.\"[]'
        self.pos = 0
        self.LWS = ' \t'
        self.CR = '\r\n'
        self.atomends = self.specials + self.LWS + self.CR
        # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
        # is obsolete syntax.  RFC 2822 requires that we recognize obsolete
        # syntax, so allow dots in phrases.
        self.phraseends = self.atomends.replace('.', '')
        self.field = field
        self.commentlist = []

    def gotonext(self):
        """Parse up to the start of the next address."""
        while self.pos < len(self.field):
            if self.field[self.pos] in self.LWS + '\n\r':
                self.pos += 1
            elif self.field[self.pos] == '(':
                self.commentlist.append(self.getcomment())
            else:
                break

    def getaddrlist(self):
        """Parse all addresses.

        Returns a list containing all of the addresses.
        """
        result = []
        while self.pos < len(self.field):
            ad = self.getaddress()
            if ad:
                result += ad
            else:
                result.append(('', ''))
        return result

    def getaddress(self):
        """Parse the next address."""
        self.commentlist = []
        self.gotonext()

        oldpos = self.pos
        oldcl = self.commentlist
        plist = self.getphraselist()

        self.gotonext()
        returnlist = []

        if self.pos >= len(self.field):
            # Bad email address technically, no domain.
            if plist:
                returnlist = [(SPACE.join(self.commentlist), plist[0])]

        elif self.field[self.pos] in '.@':
            # email address is just an addrspec
            # this isn't very efficient since we start over
            self.pos = oldpos
            self.commentlist = oldcl
            addrspec = self.getaddrspec()
            returnlist = [(SPACE.join(self.commentlist), addrspec)]

        elif self.field[self.pos] == ':':
            # address is a group
            returnlist = []

            fieldlen = len(self.field)
            self.pos += 1
            while self.pos < len(self.field):
                self.gotonext()
                if self.pos < fieldlen and self.field[self.pos] == ';':
                    self.pos += 1
                    break
                returnlist = returnlist + self.getaddress()

        elif self.field[self.pos] == '<':
            # Address is a phrase then a route addr
            routeaddr = self.getrouteaddr()

            if self.commentlist:
                returnlist = [(SPACE.join(plist) + ' (' +
                               ' '.join(self.commentlist) + ')', routeaddr)]
            else:
                returnlist = [(SPACE.join(plist), routeaddr)]

        else:
            if plist:
                returnlist = [(SPACE.join(self.commentlist), plist[0])]
            elif self.field[self.pos] in self.specials:
                self.pos += 1

        self.gotonext()
        if self.pos < len(self.field) and self.field[self.pos] == ',':
            self.pos += 1
        return returnlist

    def getrouteaddr(self):
        """Parse a route address (Return-path value).

        This method just skips all the route stuff and returns the addrspec.
        """
        if self.field[self.pos] != '<':
            return

        expectroute = False
        self.pos += 1
        self.gotonext()
        adlist = ''
        while self.pos < len(self.field):
            if expectroute:
                self.getdomain()
                expectroute = False
            elif self.field[self.pos] == '>':
                self.pos += 1
                break
            elif self.field[self.pos] == '@':
                self.pos += 1
                expectroute = True
            elif self.field[self.pos] == ':':
                self.pos += 1
            else:
                adlist = self.getaddrspec()
                self.pos += 1
                break
            self.gotonext()

        return adlist

    def getaddrspec(self):
        """Parse an RFC 2822 addr-spec."""
        aslist = []

        self.gotonext()
        while self.pos < len(self.field):
            if self.field[self.pos] == '.':
                aslist.append('.')
                self.pos += 1
            elif self.field[self.pos] == '"':
                aslist.append('"%s"' % self.getquote())
            elif self.field[self.pos] in self.atomends:
                break
            else:
                aslist.append(self.getatom())
            self.gotonext()

        if self.pos >= len(self.field) or self.field[self.pos] != '@':
            return EMPTYSTRING.join(aslist)

        aslist.append('@')
        self.pos += 1
        self.gotonext()
        return EMPTYSTRING.join(aslist) + self.getdomain()

    def getdomain(self):
        """Get the complete domain name from an address."""
        sdlist = []
        while self.pos < len(self.field):
            if self.field[self.pos] in self.LWS:
                self.pos += 1
            elif self.field[self.pos] == '(':
                self.commentlist.append(self.getcomment())
            elif self.field[self.pos] == '[':
                sdlist.append(self.getdomainliteral())
            elif self.field[self.pos] == '.':
                self.pos += 1
                sdlist.append('.')
            elif self.field[self.pos] in self.atomends:
                break
            else:
                sdlist.append(self.getatom())
        return EMPTYSTRING.join(sdlist)

    def getdelimited(self, beginchar, endchars, allowcomments=True):
        """Parse a header fragment delimited by special characters.

        `beginchar' is the start character for the fragment.
        If self is not looking at an instance of `beginchar' then
        getdelimited returns the empty string.

        `endchars' is a sequence of allowable end-delimiting characters.
        Parsing stops when one of these is encountered.

        If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
        within the parsed fragment.
        """
        if self.field[self.pos] != beginchar:
            return ''

        slist = ['']
        quote = False
        self.pos += 1
        while self.pos < len(self.field):
            if quote:
                slist.append(self.field[self.pos])
                quote = False
            elif self.field[self.pos] in endchars:
                self.pos += 1
                break
            elif allowcomments and self.field[self.pos] == '(':
                slist.append(self.getcomment())
            elif self.field[self.pos] == '\\':
                quote = True
            else:
                slist.append(self.field[self.pos])
            self.pos += 1

        return EMPTYSTRING.join(slist)

    def getquote(self):
        """Get a quote-delimited fragment from self's field."""
        return self.getdelimited('"', '"\r', False)

    def getcomment(self):
        """Get a parenthesis-delimited fragment from self's field."""
        return self.getdelimited('(', ')\r', True)

    def getdomainliteral(self):
        """Parse an RFC 2822 domain-literal."""
        return '[%s]' % self.getdelimited('[', ']\r', False)

    def getatom(self, atomends=None):
        """Parse an RFC 2822 atom.

        Optional atomends specifies a different set of end token delimiters
        (the default is to use self.atomends).  This is used e.g. in
        getphraselist() since phrase endings must not include the `.' (which
        is legal in phrases)."""
        atomlist = ['']
        if atomends is None:
            atomends = self.atomends

        while self.pos < len(self.field):
            if self.field[self.pos] in atomends:
                break
            else:
                atomlist.append(self.field[self.pos])
            self.pos += 1

        return EMPTYSTRING.join(atomlist)

    def getphraselist(self):
        """Parse a sequence of RFC 2822 phrases.

        A phrase is a sequence of words, which are in turn either RFC 2822
        atoms or quoted-strings.  Phrases are canonicalized by squeezing all
        runs of continuous whitespace into one space.
        """
        plist = []

        while self.pos < len(self.field):
            if self.field[self.pos] in self.LWS:
                self.pos += 1
            elif self.field[self.pos] == '"':
                plist.append(self.getquote())
            elif self.field[self.pos] == '(':
                self.commentlist.append(self.getcomment())
            elif self.field[self.pos] in self.phraseends:
                break
            else:
                plist.append(self.getatom(self.phraseends))

        return plist

class AddressList(AddrlistClass):
    """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
    def __init__(self, field):
        AddrlistClass.__init__(self, field)
        if field:
            self.addresslist = self.getaddrlist()
        else:
            self.addresslist = []

    def __len__(self):
        return len(self.addresslist)

    def __str__(self):
        return COMMASPACE.join(map(dump_address_pair, self.addresslist))

    def __add__(self, other):
        # Set union
        newaddr = AddressList(None)
        newaddr.addresslist = self.addresslist[:]
        for x in other.addresslist:
            if not x in self.addresslist:
                newaddr.addresslist.append(x)
        return newaddr

    def __iadd__(self, other):
        # Set union, in-place
        for x in other.addresslist:
            if not x in self.addresslist:
                self.addresslist.append(x)
        return self

    def __sub__(self, other):
        # Set difference
        newaddr = AddressList(None)
        for x in self.addresslist:
            if not x in other.addresslist:
                newaddr.addresslist.append(x)
        return newaddr

    def __isub__(self, other):
        # Set difference, in-place
        for x in other.addresslist:
            if x in self.addresslist:
                self.addresslist.remove(x)
        return self

    def __getitem__(self, index):
        # Make indexing, slices, and 'in' work
        return self.addresslist[index]

Index: Charset.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Charset.py,v
retrieving revision 1.7.2.3
retrieving revision 1.7.2.4
diff -C2 -d -r1.7.2.3 -r1.7.2.4
*** Charset.py	14 Oct 2002 17:26:00 -0000	1.7.2.3
--- Charset.py	21 Mar 2003 21:09:31 -0000	1.7.2.4
***************
*** 36,39 ****
--- 36,53 ----
      'iso-8859-1':  (QP,        QP,      None),
      'iso-8859-2':  (QP,        QP,      None),
+     'iso-8859-3':  (QP,        QP,      None),
+     'iso-8859-4':  (QP,        QP,      None),
+     # iso-8859-5 is Cyrillic, and not especially used
+     # iso-8859-6 is Arabic, also not particularly used
+     # iso-8859-7 is Greek, QP will not make it readable
+     # iso-8859-8 is Hebrew, QP will not make it readable
+     'iso-8859-9':  (QP,        QP,      None),
+     'iso-8859-10': (QP,        QP,      None),
+     # iso-8859-11 is Thai, QP will not make it readable
+     'iso-8859-13': (QP,        QP,      None),
+     'iso-8859-14': (QP,        QP,      None),
+     'iso-8859-15': (QP,        QP,      None),
+     'windows-1252':(QP,        QP,      None),
+     'viscii':      (QP,        QP,      None),
      'us-ascii':    (None,      None,    None),
      'big5':        (BASE64,    BASE64,  None),
***************
*** 53,56 ****
--- 67,89 ----
      'latin_1': 'iso-8859-1',
      'latin-1': 'iso-8859-1',
+     'latin_2': 'iso-8859-2',
+     'latin-2': 'iso-8859-2',
+     'latin_3': 'iso-8859-3',
+     'latin-3': 'iso-8859-3',
+     'latin_4': 'iso-8859-4',
+     'latin-4': 'iso-8859-4',
+     'latin_5': 'iso-8859-9',
+     'latin-5': 'iso-8859-9',
+     'latin_6': 'iso-8859-10',
+     'latin-6': 'iso-8859-10',
+     'latin_7': 'iso-8859-13',
+     'latin-7': 'iso-8859-13',
+     'latin_8': 'iso-8859-14',
+     'latin-8': 'iso-8859-14',
+     'latin_9': 'iso-8859-15',
+     'latin-9': 'iso-8859-15',
+     'cp949':   'ks_c_5601-1987',
+     'euc_jp':  'euc-jp',
+     'euc_kr':  'euc-kr',
      'ascii':   'us-ascii',
      }
***************
*** 70,73 ****
--- 103,110 ----
      'iso-2022-jp': 'japanese.iso-2022-jp',
      'shift_jis':   'japanese.shift_jis',
+     'euc-kr':      'korean.euc-kr',
+     'ks_c_5601-1987': 'korean.cp949',
+     'iso-2022-kr': 'korean.iso-2022-kr',
+     'johab':       'korean.johab',
      'gb2132':      'eucgb2312_cn',
      'big5':        'big5_tw',
***************
*** 198,201 ****
--- 235,240 ----
          return self.input_charset.lower()

+     __repr__ = __str__
+ 
      def __eq__(self, other):
          return str(self) == str(other).lower()
***************
*** 322,326 ****
              return email.base64MIME.header_encode(s, cset)
          elif self.header_encoding == QP:
!             return email.quopriMIME.header_encode(s, cset)
          elif self.header_encoding == SHORTEST:
              lenb64 = email.base64MIME.base64_len(s)
--- 361,365 ----
              return email.base64MIME.header_encode(s, cset)
          elif self.header_encoding == QP:
!             return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
          elif self.header_encoding == SHORTEST:
              lenb64 = email.base64MIME.base64_len(s)
***************
*** 329,333 ****
                  return email.base64MIME.header_encode(s, cset)
              else:
!                 return email.quopriMIME.header_encode(s, cset)
          else:
              return s
--- 368,372 ----
                  return email.base64MIME.header_encode(s, cset)
              else:
!                 return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
          else:
              return s
***************
*** 349,353 ****
          if self.body_encoding is BASE64:
              return email.base64MIME.body_encode(s)
!         elif self.header_encoding is QP:
              return email.quopriMIME.body_encode(s)
          else:
--- 388,392 ----
          if self.body_encoding is BASE64:
              return email.base64MIME.body_encode(s)
!         elif self.body_encoding is QP:
              return email.quopriMIME.body_encode(s)
          else:

Index: Generator.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Generator.py,v
retrieving revision 1.6.10.3
retrieving revision 1.6.10.4
diff -C2 -d -r1.6.10.3 -r1.6.10.4
*** Generator.py	14 Oct 2002 17:26:01 -0000	1.6.10.3
--- Generator.py	21 Mar 2003 21:09:31 -0000	1.6.10.4
***************
*** 5,10 ****
  """

- import time
  import re
  import random

--- 5,11 ----
  """

  import re
+ import time
+ import locale
  import random

***************
*** 13,16 ****
--- 14,18 ----

  from email.Header import Header
+ from email.Parser import NLCRE

  try:
***************
*** 160,201 ****
      def _write_headers(self, msg):
          for h, v in msg.items():
!             # RFC 2822 says that lines SHOULD be no more than maxheaderlen
!             # characters wide, so we're well within our rights to split long
!             # headers.
!             text = '%s: %s' % (h, v)
!             if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
!                 text = self._split_header(text)
!             print >> self._fp, text
          # A blank line always separates headers from body
          print >> self._fp

-     def _split_header(self, text):
-         maxheaderlen = self.__maxheaderlen
-         # Find out whether any lines in the header are really longer than
-         # maxheaderlen characters wide.  There could be continuation lines
-         # that actually shorten it.  Also, replace hard tabs with 8 spaces.
-         lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
-         for line in lines:
-             if len(line) > maxheaderlen:
-                 break
-         else:
-             # No line was actually longer than maxheaderlen characters, so
-             # just return the original unchanged.
-             return text
-         # If we have raw 8bit data in a byte string, we have no idea what the
-         # encoding is.  I think there is no safe way to split this string.  If
-         # it's ascii-subset, then we could do a normal ascii split, but if
-         # it's multibyte then we could break the string.  There's no way to
-         # know so the least harm seems to be to not split the string and risk
-         # it being too long.
-         if _is8bitstring(text):
-             return text
-         # The `text' argument already has the field name prepended, so don't
-         # provide it here or the first line will get folded too short.
-         h = Header(text, maxlinelen=maxheaderlen,
-                    # For backwards compatibility, we use a hard tab here
-                    continuation_ws='\t')
-         return h.encode()
- 
      #
      # Handlers for writing types and subtypes
--- 162,188 ----
      def _write_headers(self, msg):
          for h, v in msg.items():
!             print >> self._fp, '%s:' % h,
!             if self.__maxheaderlen == 0:
!                 # Explicit no-wrapping
!                 print >> self._fp, v
!             elif isinstance(v, Header):
!                 # Header instances know what to do
!                 print >> self._fp, v.encode()
!             elif _is8bitstring(v):
!                 # If we have raw 8bit data in a byte string, we have no idea
!                 # what the encoding is.  There is no safe way to split this
!                 # string.  If it's ascii-subset, then we could do a normal
!                 # ascii split, but if it's multibyte then we could break the
!                 # string.  There's no way to know so the least harm seems to
!                 # be to not split the string and risk it being too long.
!                 print >> self._fp, v
!             else:
!                 # Header's got lots of smarts, so use it.
!                 print >> self._fp, Header(
!                     v, maxlinelen=self.__maxheaderlen,
!                     header_name=h, continuation_ws='\t').encode()
          # A blank line always separates headers from body
          print >> self._fp

      #
      # Handlers for writing types and subtypes
***************
*** 259,262 ****
--- 246,257 ----
          if msg.preamble is not None:
              self._fp.write(msg.preamble)
+             # If preamble is the empty string, the length of the split will be
+             # 1, but the last element will be the empty string.  If it's
+             # anything else but does not end in a line separator, the length
+             # will be > 1 and not end in an empty string.  We need to
+             # guarantee a newline after the preamble, but don't add too many.
+             plines = NLCRE.split(msg.preamble)
+             if plines <> [''] and plines[-1] <> '':
+                 self._fp.write('\n')
          # First boundary is a bit different; it doesn't have a leading extra
          # newline.
***************
*** 365,369 ****
      # Craft a random boundary.  If text is given, ensure that the chosen
      # boundary doesn't appear in the text.
!     boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
      if text is None:
          return boundary
--- 360,365 ----
      # Craft a random boundary.  If text is given, ensure that the chosen
      # boundary doesn't appear in the text.
!     dp = locale.localeconv().get('decimal_point', '.')
!     boundary = ('=' * 15) + repr(random.random()).split(dp)[1] + '=='
      if text is None:
          return boundary

Index: Header.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Header.py,v
retrieving revision 1.13.2.2
retrieving revision 1.13.2.3
diff -C2 -d -r1.13.2.2 -r1.13.2.3
*** Header.py	14 Oct 2002 17:26:02 -0000	1.13.2.2
--- Header.py	21 Mar 2003 21:09:31 -0000	1.13.2.3
***************
*** 5,12 ****
--- 5,14 ----

  import re
+ import binascii
  from types import StringType, UnicodeType

  import email.quopriMIME
  import email.base64MIME
+ from email.Errors import HeaderParseError
  from email.Charset import Charset

***************
*** 26,31 ****
--- 28,36 ----
  CRLF = '\r\n'
  NL = '\n'
+ SPACE = ' '
+ USPACE = u' '
  SPACE8 = ' ' * 8
  EMPTYSTRING = ''
+ UEMPTYSTRING = u''

  MAXLINELEN = 76
***************
*** 48,51 ****
--- 53,63 ----
    ''', re.VERBOSE | re.IGNORECASE)

+ pcre = re.compile('([,;])')
+ 
+ # Field name regexp, including trailing colon, but not separating whitespace,
+ # according to RFC 2822.  Character range is from tilde to exclamation mark.
+ # For use with .match()
+ fcre = re.compile(r'[\041-\176]+:$')
+ 

***************
*** 62,65 ****
--- 74,80 ----
      header, otherwise a lower-case string containing the name of the character
      set specified in the encoded string.
+ 
+     An email.Errors.HeaderParseError may be raised when certain decoding error
+     occurs (e.g. a base64 decoding exception).
      """
      # If no encoding, just return the header
***************
*** 80,84 ****
                  # Should we continue a long line?
                  if decoded and decoded[-1][1] is None:
!                     decoded[-1] = (decoded[-1][0] + dec, None)
                  else:
                      decoded.append((unenc, None))
--- 95,99 ----
                  # Should we continue a long line?
                  if decoded and decoded[-1][1] is None:
!                     decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
                  else:
                      decoded.append((unenc, None))
***************
*** 86,95 ****
                  charset, encoding = [s.lower() for s in parts[0:2]]
                  encoded = parts[2]
!                 dec = ''
                  if encoding == 'q':
                      dec = email.quopriMIME.header_decode(encoded)
                  elif encoding == 'b':
!                     dec = email.base64MIME.decode(encoded)
!                 else:
                      dec = encoded

--- 101,116 ----
                  charset, encoding = [s.lower() for s in parts[0:2]]
                  encoded = parts[2]
!                 dec = None
                  if encoding == 'q':
                      dec = email.quopriMIME.header_decode(encoded)
                  elif encoding == 'b':
!                     try:
!                         dec = email.base64MIME.decode(encoded)
!                     except binascii.Error:
!                         # Turn this into a higher level exception.  BAW: Right
!                         # now we throw the lower level exception away but
!                         # when/if we get exception chaining, we'll preserve it.
!                         raise HeaderParseError
!                 if dec is None:
                      dec = encoded

***************
*** 127,132 ****

  class Header:
!     def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
!                  continuation_ws=' '):
          """Create a MIME-compliant header that can contain many character sets.

--- 148,154 ----

  class Header:
!     def __init__(self, s=None, charset=None,
!                  maxlinelen=None, header_name=None,
!                  continuation_ws=' ', errors='strict'):
          """Create a MIME-compliant header that can contain many character sets.

***************
*** 151,154 ****
--- 173,178 ----
          either a space or a hard tab) which will be prepended to continuation
          lines.
+ 
+         errors is passed through to the .append() call.
          """
          if charset is None:
***************
*** 162,166 ****
          self._chunks = []
          if s is not None:
!             self.append(s, charset)
          if maxlinelen is None:
              maxlinelen = MAXLINELEN
--- 186,190 ----
          self._chunks = []
          if s is not None:
!             self.append(s, charset, errors)
          if maxlinelen is None:
              maxlinelen = MAXLINELEN
***************
*** 183,189 ****
      def __unicode__(self):
          """Helper for the built-in unicode function."""
!         # charset item is a Charset instance so we need to stringify it.
!         uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
!         return u''.join(uchunks)

      # Rich comparison operators for equality only.  BAW: does it make sense to
--- 207,228 ----
      def __unicode__(self):
          """Helper for the built-in unicode function."""
!         uchunks = []
!         lastcs = None
!         for s, charset in self._chunks:
!             # We must preserve spaces between encoded and non-encoded word
!             # boundaries, which means for us we need to add a space when we go
!             # from a charset to None/us-ascii, or from None/us-ascii to a
!             # charset.  Only do this for the second and subsequent chunks.
!             nextcs = charset
!             if uchunks:
!                 if lastcs is not None:
!                     if nextcs is None or nextcs == 'us-ascii':
!                         uchunks.append(USPACE)
!                         nextcs = None
!                 elif nextcs is not None and nextcs <> 'us-ascii':
!                     uchunks.append(USPACE)
!             lastcs = nextcs
!             uchunks.append(unicode(s, str(charset)))
!         return UEMPTYSTRING.join(uchunks)

      # Rich comparison operators for equality only.  BAW: does it make sense to
***************
*** 197,201 ****
          return not self == other

!     def append(self, s, charset=None):
          """Append a string to the MIME header.

--- 236,240 ----
          return not self == other

!     def append(self, s, charset=None, errors='strict'):
          """Append a string to the MIME header.

***************
*** 214,217 ****
--- 253,259 ----
          following charsets in order: us-ascii, the charset hint, utf-8.  The
          first character set not to provoke a UnicodeError is used.
+ 
+         Optional `errors' is passed as the third argument to any unicode() or
+         ustr.encode() call.
          """
          if charset is None:
***************
*** 228,237 ****
                  # converted to a unicode with the input codec of the charset.
                  incodec = charset.input_codec or 'us-ascii'
!                 ustr = unicode(s, incodec)
                  # Now make sure that the unicode could be converted back to a
                  # byte string with the output codec, which may be different
                  # than the iput coded.  Still, use the original byte string.
                  outcodec = charset.output_codec or 'us-ascii'
!                 ustr.encode(outcodec)
              elif isinstance(s, UnicodeType):
                  # Now we have to be sure the unicode string can be converted
--- 270,279 ----
                  # converted to a unicode with the input codec of the charset.
                  incodec = charset.input_codec or 'us-ascii'
!                 ustr = unicode(s, incodec, errors)
                  # Now make sure that the unicode could be converted back to a
                  # byte string with the output codec, which may be different
                  # than the iput coded.  Still, use the original byte string.
                  outcodec = charset.output_codec or 'us-ascii'
!                 ustr.encode(outcodec, errors)
              elif isinstance(s, UnicodeType):
                  # Now we have to be sure the unicode string can be converted
***************
*** 241,245 ****
                      try:
                          outcodec = charset.output_codec or 'us-ascii'
!                         s = s.encode(outcodec)
                          break
                      except UnicodeError:
--- 283,287 ----
                      try:
                          outcodec = charset.output_codec or 'us-ascii'
!                         s = s.encode(outcodec, errors)
                          break
                      except UnicodeError:
***************
*** 249,259 ****
          self._chunks.append((s, charset))

!     def _split(self, s, charset, firstline=False):
          # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
!         encoded = charset.from_splittable(splittable)
          elen = charset.encoded_header_len(encoded)
! 
!         if elen <= self._maxlinelen:
              return [(encoded, charset)]
          # If we have undetermined raw 8bit characters sitting in a byte
--- 291,301 ----
          self._chunks.append((s, charset))

!     def _split(self, s, charset, maxlinelen, splitchars):
          # Split up a header safely for use with encode_chunks.
          splittable = charset.to_splittable(s)
!         encoded = charset.from_splittable(splittable, True)
          elen = charset.encoded_header_len(encoded)
!         # If the line's encoded length first, just return it
!         if elen <= maxlinelen:
              return [(encoded, charset)]
          # If we have undetermined raw 8bit characters sitting in a byte
***************
*** 263,267 ****
          # be to not split the header at all, but that means they could go out
          # longer than maxlinelen.
!         elif charset == '8bit':
              return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
--- 305,309 ----
          # be to not split the header at all, but that means they could go out
          # longer than maxlinelen.
!         if charset == '8bit':
              return [(s, charset)]
          # BAW: I'm not sure what the right test here is.  What we're trying to
***************
*** 276,374 ****
          # although it's possible that other charsets may also benefit from the
          # higher-level syntactic breaks.
-         #
          elif charset == 'us-ascii':
!             return self._ascii_split(s, charset, firstline)
          # BAW: should we use encoded?
          elif elen == len(s):
              # We can split on _maxlinelen boundaries because we know that the
              # encoding won't change the size of the string
!             splitpnt = self._maxlinelen
              first = charset.from_splittable(splittable[:splitpnt], False)
              last = charset.from_splittable(splittable[splitpnt:], False)
          else:
!             # Divide and conquer.
!             halfway = _floordiv(len(splittable), 2)
!             first = charset.from_splittable(splittable[:halfway], False)
!             last = charset.from_splittable(splittable[halfway:], False)
!         # Do the split
!         return self._split(first, charset, firstline) + \
!                self._split(last, charset)

!     def _ascii_split(self, s, charset, firstline):
!         # Attempt to split the line at the highest-level syntactic break
!         # possible.  Note that we don't have a lot of smarts about field
!         # syntax; we just try to break on semi-colons, then whitespace.
!         rtn = []
!         lines = s.splitlines()
!         while lines:
!             line = lines.pop(0)
!             if firstline:
!                 maxlinelen = self._firstlinelen
!                 firstline = False
!             else:
!                 #line = line.lstrip()
!                 maxlinelen = self._maxlinelen
!             # Short lines can remain unchanged
!             if len(line.replace('\t', SPACE8)) <= maxlinelen:
!                 rtn.append(line)
!             else:
!                 oldlen = len(line)
!                 # Try to break the line on semicolons, but if that doesn't
!                 # work, try to split on folding whitespace.
!                 while len(line) > maxlinelen:
!                     i = line.rfind(';', 0, maxlinelen)
!                     if i < 0:
!                         break
!                     rtn.append(line[:i] + ';')
!                     line = line[i+1:]
!                 # Is the remaining stuff still longer than maxlinelen?
!                 if len(line) <= maxlinelen:
!                     # Splitting on semis worked
!                     rtn.append(line)
!                     continue
!                 # Splitting on semis didn't finish the job.  If it did any
!                 # work at all, stick the remaining junk on the front of the
!                 # `lines' sequence and let the next pass do its thing.
!                 if len(line) <> oldlen:
!                     lines.insert(0, line)
!                     continue
!                 # Otherwise, splitting on semis didn't help at all.
!                 parts = re.split(r'(\s+)', line)
!                 if len(parts) == 1 or (len(parts) == 3 and
!                                        parts[0].endswith(':')):
!                     # This line can't be split on whitespace.  There's now
!                     # little we can do to get this into maxlinelen.  BAW:
!                     # We're still potentially breaking the RFC by possibly
!                     # allowing lines longer than the absolute maximum of 998
!                     # characters.  For now, let it slide.
!                     #
!                     # len(parts) will be 1 if this line has no `Field: '
!                     # prefix, otherwise it will be len(3).
!                     rtn.append(line)
!                     continue
!                 # There is whitespace we can split on.
!                 first = parts.pop(0)
!                 sublines = [first]
!                 acc = len(first)
!                 while parts:
!                     len0 = len(parts[0])
!                     len1 = len(parts[1])
!                     if acc + len0 + len1 <= maxlinelen:
!                         sublines.append(parts.pop(0))
!                         sublines.append(parts.pop(0))
!                         acc += len0 + len1
!                     else:
!                         # Split it here, but don't forget to ignore the
!                         # next whitespace-only part
!                         if first <> '':
!                             rtn.append(EMPTYSTRING.join(sublines))
!                         del parts[0]
!                         first = parts.pop(0)
!                         sublines = [first]
!                         acc = len(first)
!                 rtn.append(EMPTYSTRING.join(sublines))
!         return [(chunk, charset) for chunk in rtn]

!     def _encode_chunks(self, newchunks):
          # MIME-encode a header with many different charsets and/or encodings.
          #
--- 318,346 ----
          # although it's possible that other charsets may also benefit from the
          # higher-level syntactic breaks.
          elif charset == 'us-ascii':
!             return self._split_ascii(s, charset, maxlinelen, splitchars)
          # BAW: should we use encoded?
          elif elen == len(s):
              # We can split on _maxlinelen boundaries because we know that the
              # encoding won't change the size of the string
!             splitpnt = maxlinelen
              first = charset.from_splittable(splittable[:splitpnt], False)
              last = charset.from_splittable(splittable[splitpnt:], False)
          else:
!             # Binary search for split point
!             first, last = _binsplit(splittable, charset, maxlinelen)
!         # first is of the proper length so just wrap it in the appropriate
!         # chrome.  last must be recursively split.
!         fsplittable = charset.to_splittable(first)
!         fencoded = charset.from_splittable(fsplittable, True)
!         chunk = [(fencoded, charset)]
!         return chunk + self._split(last, charset, self._maxlinelen, splitchars)

!     def _split_ascii(self, s, charset, firstlen, splitchars):
!         chunks = _split_ascii(s, firstlen, self._maxlinelen,
!                               self._continuation_ws, splitchars)
!         return zip(chunks, [charset]*len(chunks))

!     def _encode_chunks(self, newchunks, maxlinelen):
          # MIME-encode a header with many different charsets and/or encodings.
          #
***************
*** 388,404 ****
          # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
          #  =?charset2?b?SvxyZ2VuIEL2aW5n?="
-         #
          chunks = []
          for header, charset in newchunks:
              if charset is None or charset.header_encoding is None:
!                 # There's no encoding for this chunk's charsets
!                 _max_append(chunks, header, self._maxlinelen)
              else:
!                 _max_append(chunks, charset.header_encode(header),
!                             self._maxlinelen, ' ')
          joiner = NL + self._continuation_ws
          return joiner.join(chunks)

!     def encode(self):
          """Encode a message header into an RFC-compliant format.

--- 360,381 ----
          # =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
          #  =?charset2?b?SvxyZ2VuIEL2aW5n?="
          chunks = []
          for header, charset in newchunks:
+             if not header:
+                 continue
              if charset is None or charset.header_encoding is None:
!                 s = header
              else:
!                 s = charset.header_encode(header)
!             # Don't add more folding whitespace than necessary
!             if chunks and chunks[-1].endswith(' '):
!                 extra = ''
!             else:
!                 extra = ' '
!             _max_append(chunks, s, maxlinelen, extra)
          joiner = NL + self._continuation_ws
          return joiner.join(chunks)

!     def encode(self, splitchars=';, '):
          """Encode a message header into an RFC-compliant format.

***************
*** 417,423 ****
          If the given charset is not known or an error occurs during
          conversion, this function will return the header untouched.
          """
          newchunks = []
          for s, charset in self._chunks:
!             newchunks += self._split(s, charset, True)
!         return self._encode_chunks(newchunks)
--- 394,515 ----
          If the given charset is not known or an error occurs during
          conversion, this function will return the header untouched.
+ 
+         Optional splitchars is a string containing characters to split long
+         ASCII lines on, in rough support of RFC 2822's `highest level
+         syntactic breaks'.  This doesn't affect RFC 2047 encoded lines.
          """
          newchunks = []
+         maxlinelen = self._firstlinelen
+         lastlen = 0
          for s, charset in self._chunks:
!             # The first bit of the next chunk should be just long enough to
!             # fill the next line.  Don't forget the space separating the
!             # encoded words.
!             targetlen = maxlinelen - lastlen - 1
!             if targetlen < charset.encoded_header_len(''):
!                 # Stick it on the next line
!                 targetlen = maxlinelen
!             newchunks += self._split(s, charset, targetlen, splitchars)
!             lastchunk, lastcharset = newchunks[-1]
!             lastlen = lastcharset.encoded_header_len(lastchunk)
!         return self._encode_chunks(newchunks, maxlinelen)
! 
! 
! 
! def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
!     lines = []
!     maxlen = firstlen
!     for line in s.splitlines():
!         # Ignore any leading whitespace (i.e. continuation whitespace) already
!         # on the line, since we'll be adding our own.
!         line = line.lstrip()
!         if len(line) < maxlen:
!             lines.append(line)
!             maxlen = restlen
!             continue
!         # Attempt to split the line at the highest-level syntactic break
!         # possible.  Note that we don't have a lot of smarts about field
!         # syntax; we just try to break on semi-colons, then commas, then
!         # whitespace.
!         for ch in splitchars:
!             if line.find(ch) >= 0:
!                 break
!         else:
!             # There's nothing useful to split the line on, not even spaces, so
!             # just append this line unchanged
!             lines.append(line)
!             maxlen = restlen
!             continue
!         # Now split the line on the character plus trailing whitespace
!         cre = re.compile(r'%s\s*' % ch)
!         if ch in ';,':
!             eol = ch
!         else:
!             eol = ''
!         joiner = eol + ' '
!         joinlen = len(joiner)
!         wslen = len(continuation_ws.replace('\t', SPACE8))
!         this = []
!         linelen = 0
!         for part in cre.split(line):
!             curlen = linelen + max(0, len(this)-1) * joinlen
!             partlen = len(part)
!             onfirstline = not lines
!             # We don't want to split after the field name, if we're on the
!             # first line and the field name is present in the header string.
!             if ch == ' ' and onfirstline and \
!                    len(this) == 1 and fcre.match(this[0]):
!                 this.append(part)
!                 linelen += partlen
!             elif curlen + partlen > maxlen:
!                 if this:
!                     lines.append(joiner.join(this) + eol)
!                 # If this part is longer than maxlen and we aren't already
!                 # splitting on whitespace, try to recursively split this line
!                 # on whitespace.
!                 if partlen > maxlen and ch <> ' ':
!                     subl = _split_ascii(part, maxlen, restlen,
!                                         continuation_ws, ' ')
!                     lines.extend(subl[:-1])
!                     this = [subl[-1]]
!                 else:
!                     this = [part]
!                 linelen = wslen + len(this[-1])
!                 maxlen = restlen
!             else:
!                 this.append(part)
!                 linelen += partlen
!         # Put any left over parts on a line by themselves
!         if this:
!             lines.append(joiner.join(this))
!     return lines
! 
! 
! 
! def _binsplit(splittable, charset, maxlinelen):
!     i = 0
!     j = len(splittable)
!     while i < j:
!         # Invariants:
!         # 1. splittable[:k] fits for all k <= i (note that we *assume*,
!         #    at the start, that splittable[:0] fits).
!         # 2. splittable[:k] does not fit for any k > j (at the start,
!         #    this means we shouldn't look at any k > len(splittable)).
!         # 3. We don't know about splittable[:k] for k in i+1..j.
!         # 4. We want to set i to the largest k that fits, with i <= k <= j.
!         #
!         m = (i+j+1) >> 1  # ceiling((i+j)/2); i < m <= j
!         chunk = charset.from_splittable(splittable[:m], True)
!         chunklen = charset.encoded_header_len(chunk)
!         if chunklen <= maxlinelen:
!             # m is acceptable, so is a new lower bound.
!             i = m
!         else:
!             # m is not acceptable, so final i must be < m.
!             j = m - 1
!     # i == j.  Invariant #1 implies that splittable[:i] fits, and
!     # invariant #2 implies that splittable[:i+1] does not fit, so i
!     # is what we're looking for.
!     first = charset.from_splittable(splittable[:i], False)
!     last  = charset.from_splittable(splittable[i:], False)
!     return first, last

Index: MIMEText.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/MIMEText.py,v
retrieving revision 1.3.10.1
retrieving revision 1.3.10.2
diff -C2 -d -r1.3.10.1 -r1.3.10.2
*** MIMEText.py	4 Oct 2002 17:24:24 -0000	1.3.10.1
--- MIMEText.py	21 Mar 2003 21:09:31 -0000	1.3.10.2
***************
*** 18,23 ****
          """Create a text/* type MIME document.

!         _text is the string for this message object.  If the text does not end
!         in a newline, one is added.

          _subtype is the MIME sub content type, defaulting to "plain".
--- 18,22 ----
          """Create a text/* type MIME document.

!         _text is the string for this message object.

          _subtype is the MIME sub content type, defaulting to "plain".
***************
*** 36,41 ****
          MIMENonMultipart.__init__(self, 'text', _subtype,
                                    **{'charset': _charset})
-         if _text and not _text.endswith('\n'):
-             _text += '\n'
          self.set_payload(_text, _charset)
          if _encoder is not None:
--- 35,38 ----

Index: Message.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Message.py,v
retrieving revision 1.9.6.2
retrieving revision 1.9.6.3
diff -C2 -d -r1.9.6.2 -r1.9.6.3
*** Message.py	10 Oct 2002 19:09:24 -0000	1.9.6.2
--- Message.py	21 Mar 2003 21:09:31 -0000	1.9.6.3
***************
*** 6,9 ****
--- 6,11 ----

  import re
+ import uu
+ import binascii
  import warnings
  from cStringIO import StringIO
***************
*** 11,16 ****

  # Intrapackage imports
- from email import Errors
  from email import Utils
  from email import Charset

--- 13,18 ----

  # Intrapackage imports
  from email import Utils
+ from email import Errors
  from email import Charset

***************
*** 165,176 ****
          i returns that index into the payload.

!         Optional decode is a flag (defaulting to False) indicating whether the
!         payload should be decoded or not, according to the
!         Content-Transfer-Encoding header.  When True and the message is not a
!         multipart, the payload will be decoded if this header's value is
!         `quoted-printable' or `base64'.  If some other encoding is used, or
!         the header is missing, the payload is returned as-is (undecoded).  If
!         the message is a multipart and the decode flag is True, then None is
!         returned.
          """
          if i is None:
--- 167,182 ----
          i returns that index into the payload.

!         Optional decode is a flag indicating whether the payload should be
!         decoded or not, according to the Content-Transfer-Encoding header
!         (default is False).
! 
!         When True and the message is not a multipart, the payload will be
!         decoded if this header's value is `quoted-printable' or `base64'.  If
!         some other encoding is used, or the header is missing, or if the
!         payload has bogus data (i.e. bogus base64 or uuencoded data), the
!         payload is returned as-is.
! 
!         If the message is a multipart and the decode flag is True, then None
!         is returned.
          """
          if i is None:
***************
*** 183,191 ****
              if self.is_multipart():
                  return None
!             cte = self.get('content-transfer-encoding', '')
!             if cte.lower() == 'quoted-printable':
                  return Utils._qdecode(payload)
!             elif cte.lower() == 'base64':
!                 return Utils._bdecode(payload)
          # Everything else, including encodings with 8bit or 7bit are returned
          # unchanged.
--- 189,209 ----
              if self.is_multipart():
                  return None
!             cte = self.get('content-transfer-encoding', '').lower()
!             if cte == 'quoted-printable':
                  return Utils._qdecode(payload)
!             elif cte == 'base64':
!                 try:
!                     return Utils._bdecode(payload)
!                 except binascii.Error:
!                     # Incorrect padding
!                     return payload
!             elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
!                 sfp = StringIO()
!                 try:
!                     uu.decode(StringIO(payload+'\n'), sfp)
!                     payload = sfp.getvalue()
!                 except uu.Error:
!                     # Some decoding problem
!                     return payload
          # Everything else, including encodings with 8bit or 7bit are returned
          # unchanged.

Index: Parser.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Parser.py,v
retrieving revision 1.5.10.3
retrieving revision 1.5.10.4
diff -C2 -d -r1.5.10.3 -r1.5.10.4
*** Parser.py	7 Oct 2002 17:02:40 -0000	1.5.10.3
--- Parser.py	21 Mar 2003 21:09:31 -0000	1.5.10.4
***************
*** 21,25 ****
      False = 0

! nlcre = re.compile('\r\n|\r|\n')

--- 21,25 ----
      False = 0

! NLCRE = re.compile('\r\n|\r|\n')

***************
*** 60,66 ****
          """
          root = self._class()
!         self._parseheaders(root, fp)
          if not headersonly:
!             self._parsebody(root, fp)
          return root

--- 60,66 ----
          """
          root = self._class()
!         firstbodyline = self._parseheaders(root, fp)
          if not headersonly:
!             self._parsebody(root, fp, firstbodyline)
          return root

***************
*** 81,84 ****
--- 81,85 ----
          lastvalue = []
          lineno = 0
+         firstbodyline = None
          while True:
              # Don't strip the line before we test for the end condition,
***************
*** 121,131 ****
                  if self._strict:
                      raise Errors.HeaderParseError(
!                         "Not a header, not a continuation: ``%s''"%line)
                  elif lineno == 1 and line.startswith('--'):
                      # allow through duplicate boundary tags.
                      continue
                  else:
!                     raise Errors.HeaderParseError(
!                         "Not a header, not a continuation: ``%s''"%line)
              if lastheader:
                  container[lastheader] = NL.join(lastvalue)
--- 122,135 ----
                  if self._strict:
                      raise Errors.HeaderParseError(
!                         "Not a header, not a continuation: ``%s''" % line)
                  elif lineno == 1 and line.startswith('--'):
                      # allow through duplicate boundary tags.
                      continue
                  else:
!                     # There was no separating blank line as mandated by RFC
!                     # 2822, but we're in non-strict mode.  So just offer up
!                     # this current line as the first body line.
!                     firstbodyline = line
!                     break
              if lastheader:
                  container[lastheader] = NL.join(lastvalue)
***************
*** 135,140 ****
          if lastheader:
              container[lastheader] = NL.join(lastvalue)

!     def _parsebody(self, container, fp):
          # Parse the body, but first split the payload on the content-type
          # boundary if present.
--- 139,145 ----
          if lastheader:
              container[lastheader] = NL.join(lastvalue)
+         return firstbodyline

!     def _parsebody(self, container, fp, firstbodyline=None):
          # Parse the body, but first split the payload on the content-type
          # boundary if present.
***************
*** 153,156 ****
--- 158,163 ----
              separator = '--' + boundary
              payload = fp.read()
+             if firstbodyline is not None:
+                 payload = firstbodyline + '\n' + payload
              # We use an RE here because boundaries can have trailing
              # whitespace.
***************
*** 170,174 ****
              # Find out what kind of line endings we're using
              start += len(mo.group('sep')) + len(mo.group('ws'))
!             mo = nlcre.search(payload, start)
              if mo:
                  start += len(mo.group(0))
--- 177,181 ----
              # Find out what kind of line endings we're using
              start += len(mo.group('sep')) + len(mo.group('ws'))
!             mo = NLCRE.search(payload, start)
              if mo:
                  start += len(mo.group(0))
***************
*** 222,228 ****
                          msgobj = self.parsestr(parthdrs, headersonly=1)
                      # while submsgobj is the message itself
-                     submsgobj = self.parsestr(part)
-                     msgobj.attach(submsgobj)
                      msgobj.set_default_type('message/rfc822')
                  else:
                      msgobj = self.parsestr(part)
--- 229,239 ----
                          msgobj = self.parsestr(parthdrs, headersonly=1)
                      # while submsgobj is the message itself
                      msgobj.set_default_type('message/rfc822')
+                     maintype = msgobj.get_content_maintype()
+                     if maintype in ('message', 'multipart'):
+                         submsgobj = self.parsestr(part)
+                         msgobj.attach(submsgobj)
+                     else:
+                         msgobj.set_payload(part)
                  else:
                      msgobj = self.parsestr(part)
***************
*** 257,261 ****
              container.attach(msg)
          else:
!             container.set_payload(fp.read())

--- 268,275 ----
              container.attach(msg)
          else:
!             text = fp.read()
!             if firstbodyline is not None:
!                 text = firstbodyline + '\n' + text
!             container.set_payload(text)

***************
*** 271,275 ****
      interested in is the message headers.
      """
!     def _parsebody(self, container, fp):
          # Consume but do not parse, the body
!         container.set_payload(fp.read())
--- 285,292 ----
      interested in is the message headers.
      """
!     def _parsebody(self, container, fp, firstbodyline=None):
          # Consume but do not parse, the body
!         text = fp.read()
!         if firstbodyline is not None:
!             text = firstbodyline + '\n' + text
!         container.set_payload(text)

Index: Utils.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Utils.py,v
retrieving revision 1.9.6.1
retrieving revision 1.9.6.2
diff -C2 -d -r1.9.6.1 -r1.9.6.2
*** Utils.py	4 Oct 2002 17:24:24 -0000	1.9.6.1
--- Utils.py	21 Mar 2003 21:09:31 -0000	1.9.6.2
***************
*** 14,24 ****
  from types import ListType

! from rfc822 import quote
! from rfc822 import AddressList as _AddressList
! from rfc822 import mktime_tz

  # We need wormarounds for bugs in these methods in older Pythons (see below)
! from rfc822 import parsedate as _parsedate
! from rfc822 import parsedate_tz as _parsedate_tz

  try:
--- 14,24 ----
  from types import ListType

! from email._parseaddr import quote
! from email._parseaddr import AddressList as _AddressList
! from email._parseaddr import mktime_tz

  # We need wormarounds for bugs in these methods in older Pythons (see below)
! from email._parseaddr import parsedate as _parsedate
! from email._parseaddr import parsedate_tz as _parsedate_tz

  try:
***************
*** 55,60 ****
  CRLF = '\r\n'

! specialsre = re.compile(r'[][\()<>@,:;".]')
! escapesre = re.compile(r'[][\()"]')

--- 55,60 ----
  CRLF = '\r\n'

! specialsre = re.compile(r'[][\\()<>@,:;".]')
! escapesre = re.compile(r'[][\\()"]')

***************
*** 67,72 ****

  def _bdecode(s):
-     if not s:
-         return s
      # We can't quite use base64.encodestring() since it tacks on a "courtesy
      # newline".  Blech!
--- 67,70 ----
***************
*** 281,287 ****
      """Decode string according to RFC 2231"""
      import urllib
!     charset, language, s = s.split("'", 2)
!     s = urllib.unquote(s)
!     return charset, language, s

--- 279,287 ----
      """Decode string according to RFC 2231"""
      import urllib
!     parts = s.split("'", 2)
!     if len(parts) == 1:
!         return None, None, s
!     charset, language, s = parts
!     return charset, language, urllib.unquote(s)

***************
*** 336,340 ****
                  value.append(continuation)
              charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
!             new_params.append((name,
!                                (charset, language, '"%s"' % quote(value))))
      return new_params
--- 336,340 ----
                  value.append(continuation)
              charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
!             new_params.append(
!                 (name, (charset, language, '"%s"' % quote(value))))
      return new_params

Index: __init__.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/__init__.py,v
retrieving revision 1.4.10.4
retrieving revision 1.4.10.5
diff -C2 -d -r1.4.10.4 -r1.4.10.5
*** __init__.py	14 Oct 2002 17:26:02 -0000	1.4.10.4
--- __init__.py	21 Mar 2003 21:09:31 -0000	1.4.10.5
***************
*** 5,9 ****
  """

! __version__ = '2.4.3'

  __all__ = [
--- 5,9 ----
  """

! __version__ = '2.5'

  __all__ = [

Index: _compat21.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat21.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat21.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- _compat21.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 8,11 ****
--- 8,14 ----
  from types import StringType, UnicodeType

+ False = 0
+ True = 1
+ 

***************
*** 32,36 ****

  def _isstring(obj):
!     return isinstance(obj, StringType) or isinstance(obj, UnicodeType)    

--- 35,39 ----

  def _isstring(obj):
!     return isinstance(obj, StringType) or isinstance(obj, UnicodeType)

***************
*** 38,46 ****
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
!     """Iterate over the parts, returning string payloads line-by-line."""
      lines = []
      for subpart in msg.walk():
!         payload = subpart.get_payload()
          if _isstring(payload):
              for line in StringIO(payload).readlines():
--- 41,52 ----
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
!     """Iterate over the parts, returning string payloads line-by-line.
! 
!     Optional decode (default False) is passed through to .get_payload().
!     """
      lines = []
      for subpart in msg.walk():
!         payload = subpart.get_payload(decode=decode)
          if _isstring(payload):
              for line in StringIO(payload).readlines():

Index: _compat22.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat22.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat22.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- _compat22.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 39,46 ****
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
!     """Iterate over the parts, returning string payloads line-by-line."""
      for subpart in msg.walk():
!         payload = subpart.get_payload()
          if _isstring(payload):
              for line in StringIO(payload):
--- 39,49 ----
  # These two functions are imported into the Iterators.py interface module.
  # The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
!     """Iterate over the parts, returning string payloads line-by-line.
! 
!     Optional decode (default False) is passed through to .get_payload().
!     """
      for subpart in msg.walk():
!         payload = subpart.get_payload(decode=decode)
          if _isstring(payload):
              for line in StringIO(payload):

Index: base64MIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/base64MIME.py,v
retrieving revision 1.5.2.1
retrieving revision 1.5.2.2
diff -C2 -d -r1.5.2.1 -r1.5.2.2
*** base64MIME.py	4 Oct 2002 17:24:24 -0000	1.5.2.1
--- base64MIME.py	21 Mar 2003 21:09:31 -0000	1.5.2.2
***************
*** 103,109 ****
      max_unencoded = _floordiv(max_encoded * 3, 4)

-     # BAW: Ben's original code used a step of max_unencoded, but I think it
-     # ought to be max_encoded.  Otherwise, where's max_encoded used?  I'm
-     # still not sure what the
      for i in range(0, len(header), max_unencoded):
          base64ed.append(b2a_base64(header[i:i+max_unencoded]))
--- 103,106 ----

Index: quopriMIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/quopriMIME.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** quopriMIME.py	4 Oct 2002 17:24:24 -0000	1.4.2.1
--- quopriMIME.py	21 Mar 2003 21:09:31 -0000	1.4.2.2
***************
*** 83,87 ****
      if not L:
          L.append(s.lstrip())
!     elif len(L[-1]) + len(s) < maxlen:
          L[-1] += extra + s
      else:
--- 83,87 ----
      if not L:
          L.append(s.lstrip())
!     elif len(L[-1]) + len(s) <= maxlen:
          L[-1] += extra + s
      else:
***************
*** 117,121 ****

      with each line wrapped safely at, at most, maxlinelen characters (defaults
!     to 76 characters).

      End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
--- 117,122 ----

      with each line wrapped safely at, at most, maxlinelen characters (defaults
!     to 76 characters).  If maxlinelen is None, the entire string is encoded in
!     one chunk with no splitting.

      End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
***************
*** 135,141 ****

      # Quopri encode each line, in encoded chunks no greater than maxlinelen in
!     # lenght, after the RFC chrome is added in.
      quoted = []
!     max_encoded = maxlinelen - len(charset) - MISC_LEN

      for c in header:
--- 136,146 ----

      # Quopri encode each line, in encoded chunks no greater than maxlinelen in
!     # length, after the RFC chrome is added in.
      quoted = []
!     if maxlinelen is None:
!         # An obnoxiously large number that's good enough
!         max_encoded = 100000
!     else:
!         max_encoded = maxlinelen - len(charset) - MISC_LEN - 1

      for c in header: