[Python-checkins] python/dist/src/Lib/email _parseaddr.py,NONE,1.5.2.1 Charset.py,1.7.2.3,1.7.2.4 Generator.py,1.6.10.3,1.6.10.4 Header.py,1.13.2.2,1.13.2.3 MIMEText.py,1.3.10.1,1.3.10.2 Message.py,1.9.6.2,1.9.6.3 Parser.py,1.5.10.3,1.5.10.4 Utils.py,1.9.6.1,1.9.6.2 __init__.py,1.4.10.4,1.4.10.5 _compat21.py,1.4.2.1,1.4.2.2 _compat22.py,1.4.2.1,1.4.2.2 base64MIME.py,1.5.2.1,1.5.2.2 quopriMIME.py,1.4.2.1,1.4.2.2
bwarsaw@users.sourceforge.net
bwarsaw@users.sourceforge.net
Fri, 21 Mar 2003 13:09:34 -0800
Update of /cvsroot/python/python/dist/src/Lib/email
In directory sc8-pr-cvs1:/tmp/cvs-serv27730/Lib/email
Modified Files:
Tag: release22-maint
Charset.py Generator.py Header.py MIMEText.py Message.py
Parser.py Utils.py __init__.py _compat21.py _compat22.py
base64MIME.py quopriMIME.py
Added Files:
Tag: release22-maint
_parseaddr.py
Log Message:
Backporting email 2.5 to Python 2.2 maint branch.
--- NEW FILE: _parseaddr.py ---
# Copyright (C) 2002 Python Software Foundation
"""Email address parsing code.
Lifted directly from rfc822.py. This should eventually be rewritten.
"""
import time
from types import TupleType
try:
True, False
except NameError:
True = 1
False = 0
SPACE = ' '
EMPTYSTRING = ''
COMMASPACE = ', '
# Parse a date field
_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
'aug', 'sep', 'oct', 'nov', 'dec',
'january', 'february', 'march', 'april', 'may', 'june', 'july',
'august', 'september', 'october', 'november', 'december']
_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
# The timezone table does not include the military time zones defined
# in RFC822, other than Z. According to RFC1123, the description in
# RFC822 gets the signs wrong, so we can't rely on any such time
# zones. RFC1123 recommends that numeric timezone indicators be used
# instead of timezone names.
_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
'EST': -500, 'EDT': -400, # Eastern
'CST': -600, 'CDT': -500, # Central
'MST': -700, 'MDT': -600, # Mountain
'PST': -800, 'PDT': -700 # Pacific
}
def parsedate_tz(data):
"""Convert a date string to a time tuple.
Accounts for military timezones.
"""
data = data.split()
# The FWS after the comma after the day-of-week is optional, so search and
# adjust for this.
if data[0].endswith(',') or data[0].lower() in _daynames:
# There's a dayname here. Skip it
del data[0]
else:
i = data[0].rfind(',')
if i < 0:
return None
data[0] = data[0][i+1:]
if len(data) == 3: # RFC 850 date, deprecated
stuff = data[0].split('-')
if len(stuff) == 3:
data = stuff + data[1:]
if len(data) == 4:
s = data[3]
i = s.find('+')
if i > 0:
data[3:] = [s[:i], s[i+1:]]
else:
data.append('') # Dummy tz
if len(data) < 5:
return None
data = data[:5]
[dd, mm, yy, tm, tz] = data
mm = mm.lower()
if mm not in _monthnames:
dd, mm = mm, dd.lower()
if mm not in _monthnames:
return None
mm = _monthnames.index(mm) + 1
if mm > 12:
mm -= 12
if dd[-1] == ',':
dd = dd[:-1]
i = yy.find(':')
if i > 0:
yy, tm = tm, yy
if yy[-1] == ',':
yy = yy[:-1]
if not yy[0].isdigit():
yy, tz = tz, yy
if tm[-1] == ',':
tm = tm[:-1]
tm = tm.split(':')
if len(tm) == 2:
[thh, tmm] = tm
tss = '0'
elif len(tm) == 3:
[thh, tmm, tss] = tm
else:
return None
try:
yy = int(yy)
dd = int(dd)
thh = int(thh)
tmm = int(tmm)
tss = int(tss)
except ValueError:
return None
tzoffset = None
tz = tz.upper()
if _timezones.has_key(tz):
tzoffset = _timezones[tz]
else:
try:
tzoffset = int(tz)
except ValueError:
pass
# Convert a timezone offset into seconds ; -0500 -> -18000
if tzoffset:
if tzoffset < 0:
tzsign = -1
tzoffset = -tzoffset
else:
tzsign = 1
tzoffset = tzsign * ( (tzoffset/100)*3600 + (tzoffset % 100)*60)
tuple = (yy, mm, dd, thh, tmm, tss, 0, 0, 0, tzoffset)
return tuple
def parsedate(data):
"""Convert a time string to a time tuple."""
t = parsedate_tz(data)
if isinstance(t, TupleType):
return t[:9]
else:
return t
def mktime_tz(data):
"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
if data[9] is None:
# No zone info, so localtime is better assumption than GMT
return time.mktime(data[:8] + (-1,))
else:
t = time.mktime(data[:8] + (0,))
return t - data[9] - time.timezone
def quote(str):
"""Add quotes around a string."""
return str.replace('\\', '\\\\').replace('"', '\\"')
class AddrlistClass:
"""Address parser class by Ben Escoto.
To understand what this class does, it helps to have a copy of RFC 2822 in
front of you.
Note: this class interface is deprecated and may be removed in the future.
Use rfc822.AddressList instead.
"""
def __init__(self, field):
"""Initialize a new instance.
`field' is an unparsed address header field, containing
one or more addresses.
"""
self.specials = '()<>@,:;.\"[]'
self.pos = 0
self.LWS = ' \t'
self.CR = '\r\n'
self.atomends = self.specials + self.LWS + self.CR
# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
# is obsolete syntax. RFC 2822 requires that we recognize obsolete
# syntax, so allow dots in phrases.
self.phraseends = self.atomends.replace('.', '')
self.field = field
self.commentlist = []
def gotonext(self):
"""Parse up to the start of the next address."""
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS + '\n\r':
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
else:
break
def getaddrlist(self):
"""Parse all addresses.
Returns a list containing all of the addresses.
"""
result = []
while self.pos < len(self.field):
ad = self.getaddress()
if ad:
result += ad
else:
result.append(('', ''))
return result
def getaddress(self):
"""Parse the next address."""
self.commentlist = []
self.gotonext()
oldpos = self.pos
oldcl = self.commentlist
plist = self.getphraselist()
self.gotonext()
returnlist = []
if self.pos >= len(self.field):
# Bad email address technically, no domain.
if plist:
returnlist = [(SPACE.join(self.commentlist), plist[0])]
elif self.field[self.pos] in '.@':
# email address is just an addrspec
# this isn't very efficient since we start over
self.pos = oldpos
self.commentlist = oldcl
addrspec = self.getaddrspec()
returnlist = [(SPACE.join(self.commentlist), addrspec)]
elif self.field[self.pos] == ':':
# address is a group
returnlist = []
fieldlen = len(self.field)
self.pos += 1
while self.pos < len(self.field):
self.gotonext()
if self.pos < fieldlen and self.field[self.pos] == ';':
self.pos += 1
break
returnlist = returnlist + self.getaddress()
elif self.field[self.pos] == '<':
# Address is a phrase then a route addr
routeaddr = self.getrouteaddr()
if self.commentlist:
returnlist = [(SPACE.join(plist) + ' (' +
' '.join(self.commentlist) + ')', routeaddr)]
else:
returnlist = [(SPACE.join(plist), routeaddr)]
else:
if plist:
returnlist = [(SPACE.join(self.commentlist), plist[0])]
elif self.field[self.pos] in self.specials:
self.pos += 1
self.gotonext()
if self.pos < len(self.field) and self.field[self.pos] == ',':
self.pos += 1
return returnlist
def getrouteaddr(self):
"""Parse a route address (Return-path value).
This method just skips all the route stuff and returns the addrspec.
"""
if self.field[self.pos] != '<':
return
expectroute = False
self.pos += 1
self.gotonext()
adlist = ''
while self.pos < len(self.field):
if expectroute:
self.getdomain()
expectroute = False
elif self.field[self.pos] == '>':
self.pos += 1
break
elif self.field[self.pos] == '@':
self.pos += 1
expectroute = True
elif self.field[self.pos] == ':':
self.pos += 1
else:
adlist = self.getaddrspec()
self.pos += 1
break
self.gotonext()
return adlist
def getaddrspec(self):
"""Parse an RFC 2822 addr-spec."""
aslist = []
self.gotonext()
while self.pos < len(self.field):
if self.field[self.pos] == '.':
aslist.append('.')
self.pos += 1
elif self.field[self.pos] == '"':
aslist.append('"%s"' % self.getquote())
elif self.field[self.pos] in self.atomends:
break
else:
aslist.append(self.getatom())
self.gotonext()
if self.pos >= len(self.field) or self.field[self.pos] != '@':
return EMPTYSTRING.join(aslist)
aslist.append('@')
self.pos += 1
self.gotonext()
return EMPTYSTRING.join(aslist) + self.getdomain()
def getdomain(self):
"""Get the complete domain name from an address."""
sdlist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos += 1
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] == '[':
sdlist.append(self.getdomainliteral())
elif self.field[self.pos] == '.':
self.pos += 1
sdlist.append('.')
elif self.field[self.pos] in self.atomends:
break
else:
sdlist.append(self.getatom())
return EMPTYSTRING.join(sdlist)
def getdelimited(self, beginchar, endchars, allowcomments=True):
"""Parse a header fragment delimited by special characters.
`beginchar' is the start character for the fragment.
If self is not looking at an instance of `beginchar' then
getdelimited returns the empty string.
`endchars' is a sequence of allowable end-delimiting characters.
Parsing stops when one of these is encountered.
If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
within the parsed fragment.
"""
if self.field[self.pos] != beginchar:
return ''
slist = ['']
quote = False
self.pos += 1
while self.pos < len(self.field):
if quote:
slist.append(self.field[self.pos])
quote = False
elif self.field[self.pos] in endchars:
self.pos += 1
break
elif allowcomments and self.field[self.pos] == '(':
slist.append(self.getcomment())
elif self.field[self.pos] == '\\':
quote = True
else:
slist.append(self.field[self.pos])
self.pos += 1
return EMPTYSTRING.join(slist)
def getquote(self):
"""Get a quote-delimited fragment from self's field."""
return self.getdelimited('"', '"\r', False)
def getcomment(self):
"""Get a parenthesis-delimited fragment from self's field."""
return self.getdelimited('(', ')\r', True)
def getdomainliteral(self):
"""Parse an RFC 2822 domain-literal."""
return '[%s]' % self.getdelimited('[', ']\r', False)
def getatom(self, atomends=None):
"""Parse an RFC 2822 atom.
Optional atomends specifies a different set of end token delimiters
(the default is to use self.atomends). This is used e.g. in
getphraselist() since phrase endings must not include the `.' (which
is legal in phrases)."""
atomlist = ['']
if atomends is None:
atomends = self.atomends
while self.pos < len(self.field):
if self.field[self.pos] in atomends:
break
else:
atomlist.append(self.field[self.pos])
self.pos += 1
return EMPTYSTRING.join(atomlist)
def getphraselist(self):
"""Parse a sequence of RFC 2822 phrases.
A phrase is a sequence of words, which are in turn either RFC 2822
atoms or quoted-strings. Phrases are canonicalized by squeezing all
runs of continuous whitespace into one space.
"""
plist = []
while self.pos < len(self.field):
if self.field[self.pos] in self.LWS:
self.pos += 1
elif self.field[self.pos] == '"':
plist.append(self.getquote())
elif self.field[self.pos] == '(':
self.commentlist.append(self.getcomment())
elif self.field[self.pos] in self.phraseends:
break
else:
plist.append(self.getatom(self.phraseends))
return plist
class AddressList(AddrlistClass):
"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
def __init__(self, field):
AddrlistClass.__init__(self, field)
if field:
self.addresslist = self.getaddrlist()
else:
self.addresslist = []
def __len__(self):
return len(self.addresslist)
def __str__(self):
return COMMASPACE.join(map(dump_address_pair, self.addresslist))
def __add__(self, other):
# Set union
newaddr = AddressList(None)
newaddr.addresslist = self.addresslist[:]
for x in other.addresslist:
if not x in self.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __iadd__(self, other):
# Set union, in-place
for x in other.addresslist:
if not x in self.addresslist:
self.addresslist.append(x)
return self
def __sub__(self, other):
# Set difference
newaddr = AddressList(None)
for x in self.addresslist:
if not x in other.addresslist:
newaddr.addresslist.append(x)
return newaddr
def __isub__(self, other):
# Set difference, in-place
for x in other.addresslist:
if x in self.addresslist:
self.addresslist.remove(x)
return self
def __getitem__(self, index):
# Make indexing, slices, and 'in' work
return self.addresslist[index]
Index: Charset.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Charset.py,v
retrieving revision 1.7.2.3
retrieving revision 1.7.2.4
diff -C2 -d -r1.7.2.3 -r1.7.2.4
*** Charset.py 14 Oct 2002 17:26:00 -0000 1.7.2.3
--- Charset.py 21 Mar 2003 21:09:31 -0000 1.7.2.4
***************
*** 36,39 ****
--- 36,53 ----
'iso-8859-1': (QP, QP, None),
'iso-8859-2': (QP, QP, None),
+ 'iso-8859-3': (QP, QP, None),
+ 'iso-8859-4': (QP, QP, None),
+ # iso-8859-5 is Cyrillic, and not especially used
+ # iso-8859-6 is Arabic, also not particularly used
+ # iso-8859-7 is Greek, QP will not make it readable
+ # iso-8859-8 is Hebrew, QP will not make it readable
+ 'iso-8859-9': (QP, QP, None),
+ 'iso-8859-10': (QP, QP, None),
+ # iso-8859-11 is Thai, QP will not make it readable
+ 'iso-8859-13': (QP, QP, None),
+ 'iso-8859-14': (QP, QP, None),
+ 'iso-8859-15': (QP, QP, None),
+ 'windows-1252':(QP, QP, None),
+ 'viscii': (QP, QP, None),
'us-ascii': (None, None, None),
'big5': (BASE64, BASE64, None),
***************
*** 53,56 ****
--- 67,89 ----
'latin_1': 'iso-8859-1',
'latin-1': 'iso-8859-1',
+ 'latin_2': 'iso-8859-2',
+ 'latin-2': 'iso-8859-2',
+ 'latin_3': 'iso-8859-3',
+ 'latin-3': 'iso-8859-3',
+ 'latin_4': 'iso-8859-4',
+ 'latin-4': 'iso-8859-4',
+ 'latin_5': 'iso-8859-9',
+ 'latin-5': 'iso-8859-9',
+ 'latin_6': 'iso-8859-10',
+ 'latin-6': 'iso-8859-10',
+ 'latin_7': 'iso-8859-13',
+ 'latin-7': 'iso-8859-13',
+ 'latin_8': 'iso-8859-14',
+ 'latin-8': 'iso-8859-14',
+ 'latin_9': 'iso-8859-15',
+ 'latin-9': 'iso-8859-15',
+ 'cp949': 'ks_c_5601-1987',
+ 'euc_jp': 'euc-jp',
+ 'euc_kr': 'euc-kr',
'ascii': 'us-ascii',
}
***************
*** 70,73 ****
--- 103,110 ----
'iso-2022-jp': 'japanese.iso-2022-jp',
'shift_jis': 'japanese.shift_jis',
+ 'euc-kr': 'korean.euc-kr',
+ 'ks_c_5601-1987': 'korean.cp949',
+ 'iso-2022-kr': 'korean.iso-2022-kr',
+ 'johab': 'korean.johab',
'gb2132': 'eucgb2312_cn',
'big5': 'big5_tw',
***************
*** 198,201 ****
--- 235,240 ----
return self.input_charset.lower()
+ __repr__ = __str__
+
def __eq__(self, other):
return str(self) == str(other).lower()
***************
*** 322,326 ****
return email.base64MIME.header_encode(s, cset)
elif self.header_encoding == QP:
! return email.quopriMIME.header_encode(s, cset)
elif self.header_encoding == SHORTEST:
lenb64 = email.base64MIME.base64_len(s)
--- 361,365 ----
return email.base64MIME.header_encode(s, cset)
elif self.header_encoding == QP:
! return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
elif self.header_encoding == SHORTEST:
lenb64 = email.base64MIME.base64_len(s)
***************
*** 329,333 ****
return email.base64MIME.header_encode(s, cset)
else:
! return email.quopriMIME.header_encode(s, cset)
else:
return s
--- 368,372 ----
return email.base64MIME.header_encode(s, cset)
else:
! return email.quopriMIME.header_encode(s, cset, maxlinelen=None)
else:
return s
***************
*** 349,353 ****
if self.body_encoding is BASE64:
return email.base64MIME.body_encode(s)
! elif self.header_encoding is QP:
return email.quopriMIME.body_encode(s)
else:
--- 388,392 ----
if self.body_encoding is BASE64:
return email.base64MIME.body_encode(s)
! elif self.body_encoding is QP:
return email.quopriMIME.body_encode(s)
else:
Index: Generator.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Generator.py,v
retrieving revision 1.6.10.3
retrieving revision 1.6.10.4
diff -C2 -d -r1.6.10.3 -r1.6.10.4
*** Generator.py 14 Oct 2002 17:26:01 -0000 1.6.10.3
--- Generator.py 21 Mar 2003 21:09:31 -0000 1.6.10.4
***************
*** 5,10 ****
"""
- import time
import re
import random
--- 5,11 ----
"""
import re
+ import time
+ import locale
import random
***************
*** 13,16 ****
--- 14,18 ----
from email.Header import Header
+ from email.Parser import NLCRE
try:
***************
*** 160,201 ****
def _write_headers(self, msg):
for h, v in msg.items():
! # RFC 2822 says that lines SHOULD be no more than maxheaderlen
! # characters wide, so we're well within our rights to split long
! # headers.
! text = '%s: %s' % (h, v)
! if self.__maxheaderlen > 0 and len(text) > self.__maxheaderlen:
! text = self._split_header(text)
! print >> self._fp, text
# A blank line always separates headers from body
print >> self._fp
- def _split_header(self, text):
- maxheaderlen = self.__maxheaderlen
- # Find out whether any lines in the header are really longer than
- # maxheaderlen characters wide. There could be continuation lines
- # that actually shorten it. Also, replace hard tabs with 8 spaces.
- lines = [s.replace('\t', SPACE8) for s in text.splitlines()]
- for line in lines:
- if len(line) > maxheaderlen:
- break
- else:
- # No line was actually longer than maxheaderlen characters, so
- # just return the original unchanged.
- return text
- # If we have raw 8bit data in a byte string, we have no idea what the
- # encoding is. I think there is no safe way to split this string. If
- # it's ascii-subset, then we could do a normal ascii split, but if
- # it's multibyte then we could break the string. There's no way to
- # know so the least harm seems to be to not split the string and risk
- # it being too long.
- if _is8bitstring(text):
- return text
- # The `text' argument already has the field name prepended, so don't
- # provide it here or the first line will get folded too short.
- h = Header(text, maxlinelen=maxheaderlen,
- # For backwards compatibility, we use a hard tab here
- continuation_ws='\t')
- return h.encode()
-
#
# Handlers for writing types and subtypes
--- 162,188 ----
def _write_headers(self, msg):
for h, v in msg.items():
! print >> self._fp, '%s:' % h,
! if self.__maxheaderlen == 0:
! # Explicit no-wrapping
! print >> self._fp, v
! elif isinstance(v, Header):
! # Header instances know what to do
! print >> self._fp, v.encode()
! elif _is8bitstring(v):
! # If we have raw 8bit data in a byte string, we have no idea
! # what the encoding is. There is no safe way to split this
! # string. If it's ascii-subset, then we could do a normal
! # ascii split, but if it's multibyte then we could break the
! # string. There's no way to know so the least harm seems to
! # be to not split the string and risk it being too long.
! print >> self._fp, v
! else:
! # Header's got lots of smarts, so use it.
! print >> self._fp, Header(
! v, maxlinelen=self.__maxheaderlen,
! header_name=h, continuation_ws='\t').encode()
# A blank line always separates headers from body
print >> self._fp
#
# Handlers for writing types and subtypes
***************
*** 259,262 ****
--- 246,257 ----
if msg.preamble is not None:
self._fp.write(msg.preamble)
+ # If preamble is the empty string, the length of the split will be
+ # 1, but the last element will be the empty string. If it's
+ # anything else but does not end in a line separator, the length
+ # will be > 1 and not end in an empty string. We need to
+ # guarantee a newline after the preamble, but don't add too many.
+ plines = NLCRE.split(msg.preamble)
+ if plines <> [''] and plines[-1] <> '':
+ self._fp.write('\n')
# First boundary is a bit different; it doesn't have a leading extra
# newline.
***************
*** 365,369 ****
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
! boundary = ('=' * 15) + repr(random.random()).split('.')[1] + '=='
if text is None:
return boundary
--- 360,365 ----
# Craft a random boundary. If text is given, ensure that the chosen
# boundary doesn't appear in the text.
! dp = locale.localeconv().get('decimal_point', '.')
! boundary = ('=' * 15) + repr(random.random()).split(dp)[1] + '=='
if text is None:
return boundary
Index: Header.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Header.py,v
retrieving revision 1.13.2.2
retrieving revision 1.13.2.3
diff -C2 -d -r1.13.2.2 -r1.13.2.3
*** Header.py 14 Oct 2002 17:26:02 -0000 1.13.2.2
--- Header.py 21 Mar 2003 21:09:31 -0000 1.13.2.3
***************
*** 5,12 ****
--- 5,14 ----
import re
+ import binascii
from types import StringType, UnicodeType
import email.quopriMIME
import email.base64MIME
+ from email.Errors import HeaderParseError
from email.Charset import Charset
***************
*** 26,31 ****
--- 28,36 ----
CRLF = '\r\n'
NL = '\n'
+ SPACE = ' '
+ USPACE = u' '
SPACE8 = ' ' * 8
EMPTYSTRING = ''
+ UEMPTYSTRING = u''
MAXLINELEN = 76
***************
*** 48,51 ****
--- 53,63 ----
''', re.VERBOSE | re.IGNORECASE)
+ pcre = re.compile('([,;])')
+
+ # Field name regexp, including trailing colon, but not separating whitespace,
+ # according to RFC 2822. Character range is from tilde to exclamation mark.
+ # For use with .match()
+ fcre = re.compile(r'[\041-\176]+:$')
+
***************
*** 62,65 ****
--- 74,80 ----
header, otherwise a lower-case string containing the name of the character
set specified in the encoded string.
+
+ An email.Errors.HeaderParseError may be raised when certain decoding error
+ occurs (e.g. a base64 decoding exception).
"""
# If no encoding, just return the header
***************
*** 80,84 ****
# Should we continue a long line?
if decoded and decoded[-1][1] is None:
! decoded[-1] = (decoded[-1][0] + dec, None)
else:
decoded.append((unenc, None))
--- 95,99 ----
# Should we continue a long line?
if decoded and decoded[-1][1] is None:
! decoded[-1] = (decoded[-1][0] + SPACE + unenc, None)
else:
decoded.append((unenc, None))
***************
*** 86,95 ****
charset, encoding = [s.lower() for s in parts[0:2]]
encoded = parts[2]
! dec = ''
if encoding == 'q':
dec = email.quopriMIME.header_decode(encoded)
elif encoding == 'b':
! dec = email.base64MIME.decode(encoded)
! else:
dec = encoded
--- 101,116 ----
charset, encoding = [s.lower() for s in parts[0:2]]
encoded = parts[2]
! dec = None
if encoding == 'q':
dec = email.quopriMIME.header_decode(encoded)
elif encoding == 'b':
! try:
! dec = email.base64MIME.decode(encoded)
! except binascii.Error:
! # Turn this into a higher level exception. BAW: Right
! # now we throw the lower level exception away but
! # when/if we get exception chaining, we'll preserve it.
! raise HeaderParseError
! if dec is None:
dec = encoded
***************
*** 127,132 ****
class Header:
! def __init__(self, s=None, charset=None, maxlinelen=None, header_name=None,
! continuation_ws=' '):
"""Create a MIME-compliant header that can contain many character sets.
--- 148,154 ----
class Header:
! def __init__(self, s=None, charset=None,
! maxlinelen=None, header_name=None,
! continuation_ws=' ', errors='strict'):
"""Create a MIME-compliant header that can contain many character sets.
***************
*** 151,154 ****
--- 173,178 ----
either a space or a hard tab) which will be prepended to continuation
lines.
+
+ errors is passed through to the .append() call.
"""
if charset is None:
***************
*** 162,166 ****
self._chunks = []
if s is not None:
! self.append(s, charset)
if maxlinelen is None:
maxlinelen = MAXLINELEN
--- 186,190 ----
self._chunks = []
if s is not None:
! self.append(s, charset, errors)
if maxlinelen is None:
maxlinelen = MAXLINELEN
***************
*** 183,189 ****
def __unicode__(self):
"""Helper for the built-in unicode function."""
! # charset item is a Charset instance so we need to stringify it.
! uchunks = [unicode(s, str(charset)) for s, charset in self._chunks]
! return u''.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
--- 207,228 ----
def __unicode__(self):
"""Helper for the built-in unicode function."""
! uchunks = []
! lastcs = None
! for s, charset in self._chunks:
! # We must preserve spaces between encoded and non-encoded word
! # boundaries, which means for us we need to add a space when we go
! # from a charset to None/us-ascii, or from None/us-ascii to a
! # charset. Only do this for the second and subsequent chunks.
! nextcs = charset
! if uchunks:
! if lastcs is not None:
! if nextcs is None or nextcs == 'us-ascii':
! uchunks.append(USPACE)
! nextcs = None
! elif nextcs is not None and nextcs <> 'us-ascii':
! uchunks.append(USPACE)
! lastcs = nextcs
! uchunks.append(unicode(s, str(charset)))
! return UEMPTYSTRING.join(uchunks)
# Rich comparison operators for equality only. BAW: does it make sense to
***************
*** 197,201 ****
return not self == other
! def append(self, s, charset=None):
"""Append a string to the MIME header.
--- 236,240 ----
return not self == other
! def append(self, s, charset=None, errors='strict'):
"""Append a string to the MIME header.
***************
*** 214,217 ****
--- 253,259 ----
following charsets in order: us-ascii, the charset hint, utf-8. The
first character set not to provoke a UnicodeError is used.
+
+ Optional `errors' is passed as the third argument to any unicode() or
+ ustr.encode() call.
"""
if charset is None:
***************
*** 228,237 ****
# converted to a unicode with the input codec of the charset.
incodec = charset.input_codec or 'us-ascii'
! ustr = unicode(s, incodec)
# Now make sure that the unicode could be converted back to a
# byte string with the output codec, which may be different
# than the iput coded. Still, use the original byte string.
outcodec = charset.output_codec or 'us-ascii'
! ustr.encode(outcodec)
elif isinstance(s, UnicodeType):
# Now we have to be sure the unicode string can be converted
--- 270,279 ----
# converted to a unicode with the input codec of the charset.
incodec = charset.input_codec or 'us-ascii'
! ustr = unicode(s, incodec, errors)
# Now make sure that the unicode could be converted back to a
# byte string with the output codec, which may be different
# than the iput coded. Still, use the original byte string.
outcodec = charset.output_codec or 'us-ascii'
! ustr.encode(outcodec, errors)
elif isinstance(s, UnicodeType):
# Now we have to be sure the unicode string can be converted
***************
*** 241,245 ****
try:
outcodec = charset.output_codec or 'us-ascii'
! s = s.encode(outcodec)
break
except UnicodeError:
--- 283,287 ----
try:
outcodec = charset.output_codec or 'us-ascii'
! s = s.encode(outcodec, errors)
break
except UnicodeError:
***************
*** 249,259 ****
self._chunks.append((s, charset))
! def _split(self, s, charset, firstline=False):
# Split up a header safely for use with encode_chunks.
splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable)
elen = charset.encoded_header_len(encoded)
!
! if elen <= self._maxlinelen:
return [(encoded, charset)]
# If we have undetermined raw 8bit characters sitting in a byte
--- 291,301 ----
self._chunks.append((s, charset))
! def _split(self, s, charset, maxlinelen, splitchars):
# Split up a header safely for use with encode_chunks.
splittable = charset.to_splittable(s)
! encoded = charset.from_splittable(splittable, True)
elen = charset.encoded_header_len(encoded)
! # If the line's encoded length first, just return it
! if elen <= maxlinelen:
return [(encoded, charset)]
# If we have undetermined raw 8bit characters sitting in a byte
***************
*** 263,267 ****
# be to not split the header at all, but that means they could go out
# longer than maxlinelen.
! elif charset == '8bit':
return [(s, charset)]
# BAW: I'm not sure what the right test here is. What we're trying to
--- 305,309 ----
# be to not split the header at all, but that means they could go out
# longer than maxlinelen.
! if charset == '8bit':
return [(s, charset)]
# BAW: I'm not sure what the right test here is. What we're trying to
***************
*** 276,374 ****
# although it's possible that other charsets may also benefit from the
# higher-level syntactic breaks.
- #
elif charset == 'us-ascii':
! return self._ascii_split(s, charset, firstline)
# BAW: should we use encoded?
elif elen == len(s):
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
! splitpnt = self._maxlinelen
first = charset.from_splittable(splittable[:splitpnt], False)
last = charset.from_splittable(splittable[splitpnt:], False)
else:
! # Divide and conquer.
! halfway = _floordiv(len(splittable), 2)
! first = charset.from_splittable(splittable[:halfway], False)
! last = charset.from_splittable(splittable[halfway:], False)
! # Do the split
! return self._split(first, charset, firstline) + \
! self._split(last, charset)
! def _ascii_split(self, s, charset, firstline):
! # Attempt to split the line at the highest-level syntactic break
! # possible. Note that we don't have a lot of smarts about field
! # syntax; we just try to break on semi-colons, then whitespace.
! rtn = []
! lines = s.splitlines()
! while lines:
! line = lines.pop(0)
! if firstline:
! maxlinelen = self._firstlinelen
! firstline = False
! else:
! #line = line.lstrip()
! maxlinelen = self._maxlinelen
! # Short lines can remain unchanged
! if len(line.replace('\t', SPACE8)) <= maxlinelen:
! rtn.append(line)
! else:
! oldlen = len(line)
! # Try to break the line on semicolons, but if that doesn't
! # work, try to split on folding whitespace.
! while len(line) > maxlinelen:
! i = line.rfind(';', 0, maxlinelen)
! if i < 0:
! break
! rtn.append(line[:i] + ';')
! line = line[i+1:]
! # Is the remaining stuff still longer than maxlinelen?
! if len(line) <= maxlinelen:
! # Splitting on semis worked
! rtn.append(line)
! continue
! # Splitting on semis didn't finish the job. If it did any
! # work at all, stick the remaining junk on the front of the
! # `lines' sequence and let the next pass do its thing.
! if len(line) <> oldlen:
! lines.insert(0, line)
! continue
! # Otherwise, splitting on semis didn't help at all.
! parts = re.split(r'(\s+)', line)
! if len(parts) == 1 or (len(parts) == 3 and
! parts[0].endswith(':')):
! # This line can't be split on whitespace. There's now
! # little we can do to get this into maxlinelen. BAW:
! # We're still potentially breaking the RFC by possibly
! # allowing lines longer than the absolute maximum of 998
! # characters. For now, let it slide.
! #
! # len(parts) will be 1 if this line has no `Field: '
! # prefix, otherwise it will be len(3).
! rtn.append(line)
! continue
! # There is whitespace we can split on.
! first = parts.pop(0)
! sublines = [first]
! acc = len(first)
! while parts:
! len0 = len(parts[0])
! len1 = len(parts[1])
! if acc + len0 + len1 <= maxlinelen:
! sublines.append(parts.pop(0))
! sublines.append(parts.pop(0))
! acc += len0 + len1
! else:
! # Split it here, but don't forget to ignore the
! # next whitespace-only part
! if first <> '':
! rtn.append(EMPTYSTRING.join(sublines))
! del parts[0]
! first = parts.pop(0)
! sublines = [first]
! acc = len(first)
! rtn.append(EMPTYSTRING.join(sublines))
! return [(chunk, charset) for chunk in rtn]
! def _encode_chunks(self, newchunks):
# MIME-encode a header with many different charsets and/or encodings.
#
--- 318,346 ----
# although it's possible that other charsets may also benefit from the
# higher-level syntactic breaks.
elif charset == 'us-ascii':
! return self._split_ascii(s, charset, maxlinelen, splitchars)
# BAW: should we use encoded?
elif elen == len(s):
# We can split on _maxlinelen boundaries because we know that the
# encoding won't change the size of the string
! splitpnt = maxlinelen
first = charset.from_splittable(splittable[:splitpnt], False)
last = charset.from_splittable(splittable[splitpnt:], False)
else:
! # Binary search for split point
! first, last = _binsplit(splittable, charset, maxlinelen)
! # first is of the proper length so just wrap it in the appropriate
! # chrome. last must be recursively split.
! fsplittable = charset.to_splittable(first)
! fencoded = charset.from_splittable(fsplittable, True)
! chunk = [(fencoded, charset)]
! return chunk + self._split(last, charset, self._maxlinelen, splitchars)
! def _split_ascii(self, s, charset, firstlen, splitchars):
! chunks = _split_ascii(s, firstlen, self._maxlinelen,
! self._continuation_ws, splitchars)
! return zip(chunks, [charset]*len(chunks))
! def _encode_chunks(self, newchunks, maxlinelen):
# MIME-encode a header with many different charsets and/or encodings.
#
***************
*** 388,404 ****
# =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
# =?charset2?b?SvxyZ2VuIEL2aW5n?="
- #
chunks = []
for header, charset in newchunks:
if charset is None or charset.header_encoding is None:
! # There's no encoding for this chunk's charsets
! _max_append(chunks, header, self._maxlinelen)
else:
! _max_append(chunks, charset.header_encode(header),
! self._maxlinelen, ' ')
joiner = NL + self._continuation_ws
return joiner.join(chunks)
! def encode(self):
"""Encode a message header into an RFC-compliant format.
--- 360,381 ----
# =?charset1?q?Mar=EDa_Gonz=E1lez_Alonso?=\n
# =?charset2?b?SvxyZ2VuIEL2aW5n?="
chunks = []
for header, charset in newchunks:
+ if not header:
+ continue
if charset is None or charset.header_encoding is None:
! s = header
else:
! s = charset.header_encode(header)
! # Don't add more folding whitespace than necessary
! if chunks and chunks[-1].endswith(' '):
! extra = ''
! else:
! extra = ' '
! _max_append(chunks, s, maxlinelen, extra)
joiner = NL + self._continuation_ws
return joiner.join(chunks)
! def encode(self, splitchars=';, '):
"""Encode a message header into an RFC-compliant format.
***************
*** 417,423 ****
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
"""
newchunks = []
for s, charset in self._chunks:
! newchunks += self._split(s, charset, True)
! return self._encode_chunks(newchunks)
--- 394,515 ----
If the given charset is not known or an error occurs during
conversion, this function will return the header untouched.
+
+ Optional splitchars is a string containing characters to split long
+ ASCII lines on, in rough support of RFC 2822's `highest level
+ syntactic breaks'. This doesn't affect RFC 2047 encoded lines.
"""
newchunks = []
+ maxlinelen = self._firstlinelen
+ lastlen = 0
for s, charset in self._chunks:
! # The first bit of the next chunk should be just long enough to
! # fill the next line. Don't forget the space separating the
! # encoded words.
! targetlen = maxlinelen - lastlen - 1
! if targetlen < charset.encoded_header_len(''):
! # Stick it on the next line
! targetlen = maxlinelen
! newchunks += self._split(s, charset, targetlen, splitchars)
! lastchunk, lastcharset = newchunks[-1]
! lastlen = lastcharset.encoded_header_len(lastchunk)
! return self._encode_chunks(newchunks, maxlinelen)
!
!
!
! def _split_ascii(s, firstlen, restlen, continuation_ws, splitchars):
! lines = []
! maxlen = firstlen
! for line in s.splitlines():
! # Ignore any leading whitespace (i.e. continuation whitespace) already
! # on the line, since we'll be adding our own.
! line = line.lstrip()
! if len(line) < maxlen:
! lines.append(line)
! maxlen = restlen
! continue
! # Attempt to split the line at the highest-level syntactic break
! # possible. Note that we don't have a lot of smarts about field
! # syntax; we just try to break on semi-colons, then commas, then
! # whitespace.
! for ch in splitchars:
! if line.find(ch) >= 0:
! break
! else:
! # There's nothing useful to split the line on, not even spaces, so
! # just append this line unchanged
! lines.append(line)
! maxlen = restlen
! continue
! # Now split the line on the character plus trailing whitespace
! cre = re.compile(r'%s\s*' % ch)
! if ch in ';,':
! eol = ch
! else:
! eol = ''
! joiner = eol + ' '
! joinlen = len(joiner)
! wslen = len(continuation_ws.replace('\t', SPACE8))
! this = []
! linelen = 0
! for part in cre.split(line):
! curlen = linelen + max(0, len(this)-1) * joinlen
! partlen = len(part)
! onfirstline = not lines
! # We don't want to split after the field name, if we're on the
! # first line and the field name is present in the header string.
! if ch == ' ' and onfirstline and \
! len(this) == 1 and fcre.match(this[0]):
! this.append(part)
! linelen += partlen
! elif curlen + partlen > maxlen:
! if this:
! lines.append(joiner.join(this) + eol)
! # If this part is longer than maxlen and we aren't already
! # splitting on whitespace, try to recursively split this line
! # on whitespace.
! if partlen > maxlen and ch <> ' ':
! subl = _split_ascii(part, maxlen, restlen,
! continuation_ws, ' ')
! lines.extend(subl[:-1])
! this = [subl[-1]]
! else:
! this = [part]
! linelen = wslen + len(this[-1])
! maxlen = restlen
! else:
! this.append(part)
! linelen += partlen
! # Put any left over parts on a line by themselves
! if this:
! lines.append(joiner.join(this))
! return lines
!
!
!
! def _binsplit(splittable, charset, maxlinelen):
! i = 0
! j = len(splittable)
! while i < j:
! # Invariants:
! # 1. splittable[:k] fits for all k <= i (note that we *assume*,
! # at the start, that splittable[:0] fits).
! # 2. splittable[:k] does not fit for any k > j (at the start,
! # this means we shouldn't look at any k > len(splittable)).
! # 3. We don't know about splittable[:k] for k in i+1..j.
! # 4. We want to set i to the largest k that fits, with i <= k <= j.
! #
! m = (i+j+1) >> 1 # ceiling((i+j)/2); i < m <= j
! chunk = charset.from_splittable(splittable[:m], True)
! chunklen = charset.encoded_header_len(chunk)
! if chunklen <= maxlinelen:
! # m is acceptable, so is a new lower bound.
! i = m
! else:
! # m is not acceptable, so final i must be < m.
! j = m - 1
! # i == j. Invariant #1 implies that splittable[:i] fits, and
! # invariant #2 implies that splittable[:i+1] does not fit, so i
! # is what we're looking for.
! first = charset.from_splittable(splittable[:i], False)
! last = charset.from_splittable(splittable[i:], False)
! return first, last
Index: MIMEText.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/MIMEText.py,v
retrieving revision 1.3.10.1
retrieving revision 1.3.10.2
diff -C2 -d -r1.3.10.1 -r1.3.10.2
*** MIMEText.py 4 Oct 2002 17:24:24 -0000 1.3.10.1
--- MIMEText.py 21 Mar 2003 21:09:31 -0000 1.3.10.2
***************
*** 18,23 ****
"""Create a text/* type MIME document.
! _text is the string for this message object. If the text does not end
! in a newline, one is added.
_subtype is the MIME sub content type, defaulting to "plain".
--- 18,22 ----
"""Create a text/* type MIME document.
! _text is the string for this message object.
_subtype is the MIME sub content type, defaulting to "plain".
***************
*** 36,41 ****
MIMENonMultipart.__init__(self, 'text', _subtype,
**{'charset': _charset})
- if _text and not _text.endswith('\n'):
- _text += '\n'
self.set_payload(_text, _charset)
if _encoder is not None:
--- 35,38 ----
Index: Message.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Message.py,v
retrieving revision 1.9.6.2
retrieving revision 1.9.6.3
diff -C2 -d -r1.9.6.2 -r1.9.6.3
*** Message.py 10 Oct 2002 19:09:24 -0000 1.9.6.2
--- Message.py 21 Mar 2003 21:09:31 -0000 1.9.6.3
***************
*** 6,9 ****
--- 6,11 ----
import re
+ import uu
+ import binascii
import warnings
from cStringIO import StringIO
***************
*** 11,16 ****
# Intrapackage imports
- from email import Errors
from email import Utils
from email import Charset
--- 13,18 ----
# Intrapackage imports
from email import Utils
+ from email import Errors
from email import Charset
***************
*** 165,176 ****
i returns that index into the payload.
! Optional decode is a flag (defaulting to False) indicating whether the
! payload should be decoded or not, according to the
! Content-Transfer-Encoding header. When True and the message is not a
! multipart, the payload will be decoded if this header's value is
! `quoted-printable' or `base64'. If some other encoding is used, or
! the header is missing, the payload is returned as-is (undecoded). If
! the message is a multipart and the decode flag is True, then None is
! returned.
"""
if i is None:
--- 167,182 ----
i returns that index into the payload.
! Optional decode is a flag indicating whether the payload should be
! decoded or not, according to the Content-Transfer-Encoding header
! (default is False).
!
! When True and the message is not a multipart, the payload will be
! decoded if this header's value is `quoted-printable' or `base64'. If
! some other encoding is used, or the header is missing, or if the
! payload has bogus data (i.e. bogus base64 or uuencoded data), the
! payload is returned as-is.
!
! If the message is a multipart and the decode flag is True, then None
! is returned.
"""
if i is None:
***************
*** 183,191 ****
if self.is_multipart():
return None
! cte = self.get('content-transfer-encoding', '')
! if cte.lower() == 'quoted-printable':
return Utils._qdecode(payload)
! elif cte.lower() == 'base64':
! return Utils._bdecode(payload)
# Everything else, including encodings with 8bit or 7bit are returned
# unchanged.
--- 189,209 ----
if self.is_multipart():
return None
! cte = self.get('content-transfer-encoding', '').lower()
! if cte == 'quoted-printable':
return Utils._qdecode(payload)
! elif cte == 'base64':
! try:
! return Utils._bdecode(payload)
! except binascii.Error:
! # Incorrect padding
! return payload
! elif cte in ('x-uuencode', 'uuencode', 'uue', 'x-uue'):
! sfp = StringIO()
! try:
! uu.decode(StringIO(payload+'\n'), sfp)
! payload = sfp.getvalue()
! except uu.Error:
! # Some decoding problem
! return payload
# Everything else, including encodings with 8bit or 7bit are returned
# unchanged.
Index: Parser.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Parser.py,v
retrieving revision 1.5.10.3
retrieving revision 1.5.10.4
diff -C2 -d -r1.5.10.3 -r1.5.10.4
*** Parser.py 7 Oct 2002 17:02:40 -0000 1.5.10.3
--- Parser.py 21 Mar 2003 21:09:31 -0000 1.5.10.4
***************
*** 21,25 ****
False = 0
! nlcre = re.compile('\r\n|\r|\n')
--- 21,25 ----
False = 0
! NLCRE = re.compile('\r\n|\r|\n')
***************
*** 60,66 ****
"""
root = self._class()
! self._parseheaders(root, fp)
if not headersonly:
! self._parsebody(root, fp)
return root
--- 60,66 ----
"""
root = self._class()
! firstbodyline = self._parseheaders(root, fp)
if not headersonly:
! self._parsebody(root, fp, firstbodyline)
return root
***************
*** 81,84 ****
--- 81,85 ----
lastvalue = []
lineno = 0
+ firstbodyline = None
while True:
# Don't strip the line before we test for the end condition,
***************
*** 121,131 ****
if self._strict:
raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''"%line)
elif lineno == 1 and line.startswith('--'):
# allow through duplicate boundary tags.
continue
else:
! raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''"%line)
if lastheader:
container[lastheader] = NL.join(lastvalue)
--- 122,135 ----
if self._strict:
raise Errors.HeaderParseError(
! "Not a header, not a continuation: ``%s''" % line)
elif lineno == 1 and line.startswith('--'):
# allow through duplicate boundary tags.
continue
else:
! # There was no separating blank line as mandated by RFC
! # 2822, but we're in non-strict mode. So just offer up
! # this current line as the first body line.
! firstbodyline = line
! break
if lastheader:
container[lastheader] = NL.join(lastvalue)
***************
*** 135,140 ****
if lastheader:
container[lastheader] = NL.join(lastvalue)
! def _parsebody(self, container, fp):
# Parse the body, but first split the payload on the content-type
# boundary if present.
--- 139,145 ----
if lastheader:
container[lastheader] = NL.join(lastvalue)
+ return firstbodyline
! def _parsebody(self, container, fp, firstbodyline=None):
# Parse the body, but first split the payload on the content-type
# boundary if present.
***************
*** 153,156 ****
--- 158,163 ----
separator = '--' + boundary
payload = fp.read()
+ if firstbodyline is not None:
+ payload = firstbodyline + '\n' + payload
# We use an RE here because boundaries can have trailing
# whitespace.
***************
*** 170,174 ****
# Find out what kind of line endings we're using
start += len(mo.group('sep')) + len(mo.group('ws'))
! mo = nlcre.search(payload, start)
if mo:
start += len(mo.group(0))
--- 177,181 ----
# Find out what kind of line endings we're using
start += len(mo.group('sep')) + len(mo.group('ws'))
! mo = NLCRE.search(payload, start)
if mo:
start += len(mo.group(0))
***************
*** 222,228 ****
msgobj = self.parsestr(parthdrs, headersonly=1)
# while submsgobj is the message itself
- submsgobj = self.parsestr(part)
- msgobj.attach(submsgobj)
msgobj.set_default_type('message/rfc822')
else:
msgobj = self.parsestr(part)
--- 229,239 ----
msgobj = self.parsestr(parthdrs, headersonly=1)
# while submsgobj is the message itself
msgobj.set_default_type('message/rfc822')
+ maintype = msgobj.get_content_maintype()
+ if maintype in ('message', 'multipart'):
+ submsgobj = self.parsestr(part)
+ msgobj.attach(submsgobj)
+ else:
+ msgobj.set_payload(part)
else:
msgobj = self.parsestr(part)
***************
*** 257,261 ****
container.attach(msg)
else:
! container.set_payload(fp.read())
--- 268,275 ----
container.attach(msg)
else:
! text = fp.read()
! if firstbodyline is not None:
! text = firstbodyline + '\n' + text
! container.set_payload(text)
***************
*** 271,275 ****
interested in is the message headers.
"""
! def _parsebody(self, container, fp):
# Consume but do not parse, the body
! container.set_payload(fp.read())
--- 285,292 ----
interested in is the message headers.
"""
! def _parsebody(self, container, fp, firstbodyline=None):
# Consume but do not parse, the body
! text = fp.read()
! if firstbodyline is not None:
! text = firstbodyline + '\n' + text
! container.set_payload(text)
Index: Utils.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/Utils.py,v
retrieving revision 1.9.6.1
retrieving revision 1.9.6.2
diff -C2 -d -r1.9.6.1 -r1.9.6.2
*** Utils.py 4 Oct 2002 17:24:24 -0000 1.9.6.1
--- Utils.py 21 Mar 2003 21:09:31 -0000 1.9.6.2
***************
*** 14,24 ****
from types import ListType
! from rfc822 import quote
! from rfc822 import AddressList as _AddressList
! from rfc822 import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below)
! from rfc822 import parsedate as _parsedate
! from rfc822 import parsedate_tz as _parsedate_tz
try:
--- 14,24 ----
from types import ListType
! from email._parseaddr import quote
! from email._parseaddr import AddressList as _AddressList
! from email._parseaddr import mktime_tz
# We need wormarounds for bugs in these methods in older Pythons (see below)
! from email._parseaddr import parsedate as _parsedate
! from email._parseaddr import parsedate_tz as _parsedate_tz
try:
***************
*** 55,60 ****
CRLF = '\r\n'
! specialsre = re.compile(r'[][\()<>@,:;".]')
! escapesre = re.compile(r'[][\()"]')
--- 55,60 ----
CRLF = '\r\n'
! specialsre = re.compile(r'[][\\()<>@,:;".]')
! escapesre = re.compile(r'[][\\()"]')
***************
*** 67,72 ****
def _bdecode(s):
- if not s:
- return s
# We can't quite use base64.encodestring() since it tacks on a "courtesy
# newline". Blech!
--- 67,70 ----
***************
*** 281,287 ****
"""Decode string according to RFC 2231"""
import urllib
! charset, language, s = s.split("'", 2)
! s = urllib.unquote(s)
! return charset, language, s
--- 279,287 ----
"""Decode string according to RFC 2231"""
import urllib
! parts = s.split("'", 2)
! if len(parts) == 1:
! return None, None, s
! charset, language, s = parts
! return charset, language, urllib.unquote(s)
***************
*** 336,340 ****
value.append(continuation)
charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
! new_params.append((name,
! (charset, language, '"%s"' % quote(value))))
return new_params
--- 336,340 ----
value.append(continuation)
charset, language, value = decode_rfc2231(EMPTYSTRING.join(value))
! new_params.append(
! (name, (charset, language, '"%s"' % quote(value))))
return new_params
Index: __init__.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/__init__.py,v
retrieving revision 1.4.10.4
retrieving revision 1.4.10.5
diff -C2 -d -r1.4.10.4 -r1.4.10.5
*** __init__.py 14 Oct 2002 17:26:02 -0000 1.4.10.4
--- __init__.py 21 Mar 2003 21:09:31 -0000 1.4.10.5
***************
*** 5,9 ****
"""
! __version__ = '2.4.3'
__all__ = [
--- 5,9 ----
"""
! __version__ = '2.5'
__all__ = [
Index: _compat21.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat21.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat21.py 4 Oct 2002 17:24:24 -0000 1.4.2.1
--- _compat21.py 21 Mar 2003 21:09:31 -0000 1.4.2.2
***************
*** 8,11 ****
--- 8,14 ----
from types import StringType, UnicodeType
+ False = 0
+ True = 1
+
***************
*** 32,36 ****
def _isstring(obj):
! return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
--- 35,39 ----
def _isstring(obj):
! return isinstance(obj, StringType) or isinstance(obj, UnicodeType)
***************
*** 38,46 ****
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
! """Iterate over the parts, returning string payloads line-by-line."""
lines = []
for subpart in msg.walk():
! payload = subpart.get_payload()
if _isstring(payload):
for line in StringIO(payload).readlines():
--- 41,52 ----
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
! """Iterate over the parts, returning string payloads line-by-line.
!
! Optional decode (default False) is passed through to .get_payload().
! """
lines = []
for subpart in msg.walk():
! payload = subpart.get_payload(decode=decode)
if _isstring(payload):
for line in StringIO(payload).readlines():
Index: _compat22.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/_compat22.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** _compat22.py 4 Oct 2002 17:24:24 -0000 1.4.2.1
--- _compat22.py 21 Mar 2003 21:09:31 -0000 1.4.2.2
***************
*** 39,46 ****
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg):
! """Iterate over the parts, returning string payloads line-by-line."""
for subpart in msg.walk():
! payload = subpart.get_payload()
if _isstring(payload):
for line in StringIO(payload):
--- 39,49 ----
# These two functions are imported into the Iterators.py interface module.
# The Python 2.2 version uses generators for efficiency.
! def body_line_iterator(msg, decode=False):
! """Iterate over the parts, returning string payloads line-by-line.
!
! Optional decode (default False) is passed through to .get_payload().
! """
for subpart in msg.walk():
! payload = subpart.get_payload(decode=decode)
if _isstring(payload):
for line in StringIO(payload):
Index: base64MIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/base64MIME.py,v
retrieving revision 1.5.2.1
retrieving revision 1.5.2.2
diff -C2 -d -r1.5.2.1 -r1.5.2.2
*** base64MIME.py 4 Oct 2002 17:24:24 -0000 1.5.2.1
--- base64MIME.py 21 Mar 2003 21:09:31 -0000 1.5.2.2
***************
*** 103,109 ****
max_unencoded = _floordiv(max_encoded * 3, 4)
- # BAW: Ben's original code used a step of max_unencoded, but I think it
- # ought to be max_encoded. Otherwise, where's max_encoded used? I'm
- # still not sure what the
for i in range(0, len(header), max_unencoded):
base64ed.append(b2a_base64(header[i:i+max_unencoded]))
--- 103,106 ----
Index: quopriMIME.py
===================================================================
RCS file: /cvsroot/python/python/dist/src/Lib/email/quopriMIME.py,v
retrieving revision 1.4.2.1
retrieving revision 1.4.2.2
diff -C2 -d -r1.4.2.1 -r1.4.2.2
*** quopriMIME.py 4 Oct 2002 17:24:24 -0000 1.4.2.1
--- quopriMIME.py 21 Mar 2003 21:09:31 -0000 1.4.2.2
***************
*** 83,87 ****
if not L:
L.append(s.lstrip())
! elif len(L[-1]) + len(s) < maxlen:
L[-1] += extra + s
else:
--- 83,87 ----
if not L:
L.append(s.lstrip())
! elif len(L[-1]) + len(s) <= maxlen:
L[-1] += extra + s
else:
***************
*** 117,121 ****
with each line wrapped safely at, at most, maxlinelen characters (defaults
! to 76 characters).
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
--- 117,122 ----
with each line wrapped safely at, at most, maxlinelen characters (defaults
! to 76 characters). If maxlinelen is None, the entire string is encoded in
! one chunk with no splitting.
End-of-line characters (\\r, \\n, \\r\\n) will be automatically converted
***************
*** 135,141 ****
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
! # lenght, after the RFC chrome is added in.
quoted = []
! max_encoded = maxlinelen - len(charset) - MISC_LEN
for c in header:
--- 136,146 ----
# Quopri encode each line, in encoded chunks no greater than maxlinelen in
! # length, after the RFC chrome is added in.
quoted = []
! if maxlinelen is None:
! # An obnoxiously large number that's good enough
! max_encoded = 100000
! else:
! max_encoded = maxlinelen - len(charset) - MISC_LEN - 1
for c in header: