[Python-checkins] r82924 - in python/branches/release27-maint: Lib/email/feedparser.py Lib/email/test/test_email.py Misc/NEWS
r.david.murray
python-checkins at python.org
Sat Jul 17 03:35:16 CEST 2010
Author: r.david.murray
Date: Sat Jul 17 03:35:16 2010
New Revision: 82924
Log:
Merged revisions 82922 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/branches/py3k
........
r82922 | r.david.murray | 2010-07-16 21:19:57 -0400 (Fri, 16 Jul 2010) | 4 lines
#1555570: correctly handle a \r\n that is split by the read buffer.
Patch and test by Tony Nelson.
........
Modified:
python/branches/release27-maint/ (props changed)
python/branches/release27-maint/Lib/email/feedparser.py
python/branches/release27-maint/Lib/email/test/test_email.py
python/branches/release27-maint/Misc/NEWS
Modified: python/branches/release27-maint/Lib/email/feedparser.py
==============================================================================
--- python/branches/release27-maint/Lib/email/feedparser.py (original)
+++ python/branches/release27-maint/Lib/email/feedparser.py Sat Jul 17 03:35:16 2010
@@ -104,6 +104,10 @@
# data after the final RE. In the case of a NL/CR terminated string,
# this is the empty string.
self._partial = parts.pop()
+ #GAN 29Mar09 bugs 1555570, 1721862 Confusion at 8K boundary ending with \r:
+ # is there a \n to follow later?
+ if not self._partial and parts and parts[-1].endswith('\r'):
+ self._partial = parts.pop(-2)+parts.pop()
# parts is a list of strings, alternating between the line contents
# and the eol character(s). Gather up a list of lines after
# re-attaching the newlines.
Modified: python/branches/release27-maint/Lib/email/test/test_email.py
==============================================================================
--- python/branches/release27-maint/Lib/email/test/test_email.py (original)
+++ python/branches/release27-maint/Lib/email/test/test_email.py Sat Jul 17 03:35:16 2010
@@ -2465,6 +2465,39 @@
-Me
""")
+ def test_pushCR_LF(self):
+ '''FeedParser BufferedSubFile.push() assumed it received complete
+ line endings. A CR ending one push() followed by a LF starting
+ the next push() added an empty line.
+ '''
+ imt = [
+ ("a\r \n", 2),
+ ("b", 0),
+ ("c\n", 1),
+ ("", 0),
+ ("d\r\n", 1),
+ ("e\r", 0),
+ ("\nf", 1),
+ ("\r\n", 1),
+ ]
+ from email.feedparser import BufferedSubFile, NeedMoreData
+ bsf = BufferedSubFile()
+ om = []
+ nt = 0
+ for il, n in imt:
+ bsf.push(il)
+ nt += n
+ n1 = 0
+ while True:
+ ol = bsf.readline()
+ if ol == NeedMoreData:
+ break
+ om.append(ol)
+ n1 += 1
+ self.assertTrue(n == n1)
+ self.assertTrue(len(om) == nt)
+ self.assertTrue(''.join([il for il, n in imt]) == ''.join(om))
+
class TestParsers(TestEmailBase):
Modified: python/branches/release27-maint/Misc/NEWS
==============================================================================
--- python/branches/release27-maint/Misc/NEWS (original)
+++ python/branches/release27-maint/Misc/NEWS Sat Jul 17 03:35:16 2010
@@ -833,6 +833,8 @@
Library
-------
+- Issue #1555570: email no longer inserts extra blank lines when a \r\n
+ combo crosses an 8192 byte boundary.
- Issue #6906: Tk should not set Unicode environment variables on Windows.
More information about the Python-checkins
mailing list