[Python-3000-checkins] r59056 - in python/branches/py3k: Doc/library/mimetypes.rst Lib/encodings/utf_8_sig.py Lib/test/test_cmd_line_script.py Lib/test/test_codecs.py
guido.van.rossum
python-3000-checkins at python.org
Mon Nov 19 19:03:45 CET 2007
Author: guido.van.rossum
Date: Mon Nov 19 19:03:44 2007
New Revision: 59056
Modified:
python/branches/py3k/ (props changed)
python/branches/py3k/Doc/library/mimetypes.rst
python/branches/py3k/Lib/encodings/utf_8_sig.py
python/branches/py3k/Lib/test/test_cmd_line_script.py
python/branches/py3k/Lib/test/test_codecs.py
Log:
Merged revisions 59041-59055 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk
........
r59044 | neal.norwitz | 2007-11-18 17:46:20 -0800 (Sun, 18 Nov 2007) | 1 line
Use a slightly more recent version than 1.5.2b2.
........
r59047 | walter.doerwald | 2007-11-19 04:14:05 -0800 (Mon, 19 Nov 2007) | 2 lines
Fix typo in comment.
........
r59049 | walter.doerwald | 2007-11-19 04:41:10 -0800 (Mon, 19 Nov 2007) | 4 lines
Fix for #1444: utf_8_sig.StreamReader was (indirectly through decode())
calling codecs.utf_8_decode() with final==True, which falled with incomplete
byte sequences. Fix and test by James G. Sack.
........
r59051 | nick.coghlan | 2007-11-19 05:56:27 -0800 (Mon, 19 Nov 2007) | 1 line
Enable some test_cmd_line_script debugging output to investigate failure on Mac OSX buildbot
........
r59053 | facundo.batista | 2007-11-19 08:30:24 -0800 (Mon, 19 Nov 2007) | 3 lines
Fixed detail in add_type() explanation (issue 1463).
........
r59054 | guido.van.rossum | 2007-11-19 09:35:24 -0800 (Mon, 19 Nov 2007) | 2 lines
Make this work stand-alone, too.
........
r59055 | guido.van.rossum | 2007-11-19 09:50:22 -0800 (Mon, 19 Nov 2007) | 3 lines
Fix the OSX failures in this test -- they were due to /tmp being a symlink
to /private/tmp. Adding a call to os.path.realpath() to temp_dir() fixed it.
........
Modified: python/branches/py3k/Doc/library/mimetypes.rst
==============================================================================
--- python/branches/py3k/Doc/library/mimetypes.rst (original)
+++ python/branches/py3k/Doc/library/mimetypes.rst Mon Nov 19 19:03:44 2007
@@ -96,8 +96,8 @@
extension is already known, the new type will replace the old one. When the type
is already known the extension will be added to the list of known extensions.
- When *strict* is the mapping will added to the official MIME types, otherwise to
- the non-standard ones.
+ When *strict* is True (the default), the mapping will added to the official MIME
+ types, otherwise to the non-standard ones.
.. data:: inited
Modified: python/branches/py3k/Lib/encodings/utf_8_sig.py
==============================================================================
--- python/branches/py3k/Lib/encodings/utf_8_sig.py (original)
+++ python/branches/py3k/Lib/encodings/utf_8_sig.py Mon Nov 19 19:03:44 2007
@@ -103,12 +103,18 @@
pass
def decode(self, input, errors='strict'):
- if len(input) < 3 and codecs.BOM_UTF8.startswith(input):
- # not enough data to decide if this is a BOM
- # => try again on the next call
- return ("", 0)
+ if len(input) < 3:
+ if codecs.BOM_UTF8.startswith(input):
+ # not enough data to decide if this is a BOM
+ # => try again on the next call
+ return ("", 0)
+ elif input[:3] == codecs.BOM_UTF8:
+ self.decode = codecs.utf_8_decode
+ (output, consumed) = codecs.utf_8_decode(input[3:],errors)
+ return (output, consumed+3)
+ # (else) no BOM present
self.decode = codecs.utf_8_decode
- return decode(input, errors)
+ return codecs.utf_8_decode(input, errors)
### encodings module API
Modified: python/branches/py3k/Lib/test/test_cmd_line_script.py
==============================================================================
--- python/branches/py3k/Lib/test/test_cmd_line_script.py (original)
+++ python/branches/py3k/Lib/test/test_cmd_line_script.py Mon Nov 19 19:03:44 2007
@@ -29,6 +29,7 @@
@contextlib.contextmanager
def temp_dir():
dirname = tempfile.mkdtemp()
+ dirname = os.path.realpath(dirname)
try:
yield dirname
finally:
@@ -82,7 +83,7 @@
zip_file.close()
# if verbose:
# zip_file = zipfile.ZipFile(zip_name, 'r')
- # print "Contents of %r:" % zip_name
+ # print("Contents of %r:" % zip_name)
# zip_file.printdir()
# zip_file.close()
return zip_name
@@ -90,9 +91,9 @@
class CmdLineTest(unittest.TestCase):
def _check_script(self, script_name, expected_file, expected_argv0):
exit_code, data = _run_python(script_name)
- # if verbose:
- # print "Output from test script %r:" % script_name
- # print data
+ if verbose:
+ print("Output from test script %r:" % script_name)
+ print(data)
self.assertEqual(exit_code, 0, data)
printed_file = '__file__==%r' % expected_file
printed_argv0 = 'sys.argv[0]==%r' % expected_argv0
Modified: python/branches/py3k/Lib/test/test_codecs.py
==============================================================================
--- python/branches/py3k/Lib/test/test_codecs.py (original)
+++ python/branches/py3k/Lib/test/test_codecs.py Mon Nov 19 19:03:44 2007
@@ -59,7 +59,7 @@
class ReadTest(unittest.TestCase, MixInCheckStateHandling):
def check_partial(self, input, partialresults):
# get a StreamReader for the encoding and feed the bytestring version
- # of input to the reader byte by byte. Read every available from
+ # of input to the reader byte by byte. Read everything available from
# the StreamReader and check that the results equal the appropriate
# entries from partialresults.
q = Queue(b"")
@@ -618,10 +618,53 @@
s = "spam"
self.assertEqual(d.decode(s.encode("utf-8-sig")), s)
- def test_decoder_state(self):
- u = "\x00\x7f\x80\xff\u0100\u07ff\u0800\uffff\U0010ffff"
- self.check_state_handling_decode(self.encoding,
- u, u.encode(self.encoding))
+ def test_stream_bom(self):
+ unistring = "ABC\u00A1\u2200XYZ"
+ bytestring = codecs.BOM_UTF8 + b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+ reader = codecs.getreader("utf-8-sig")
+ for sizehint in [None] + list(range(1, 11)) + \
+ [64, 128, 256, 512, 1024]:
+ istream = reader(io.BytesIO(bytestring))
+ ostream = io.StringIO()
+ while 1:
+ if sizehint is not None:
+ data = istream.read(sizehint)
+ else:
+ data = istream.read()
+
+ if not data:
+ break
+ ostream.write(data)
+
+ got = ostream.getvalue()
+ self.assertEqual(got, unistring)
+
+ def test_stream_bare(self):
+ unistring = "ABC\u00A1\u2200XYZ"
+ bytestring = b"ABC\xC2\xA1\xE2\x88\x80XYZ"
+
+ reader = codecs.getreader("utf-8-sig")
+ for sizehint in [None] + list(range(1, 11)) + \
+ [64, 128, 256, 512, 1024]:
+ istream = reader(io.BytesIO(bytestring))
+ ostream = io.StringIO()
+ while 1:
+ if sizehint is not None:
+ data = istream.read(sizehint)
+ else:
+ data = istream.read()
+
+ if not data:
+ break
+ ostream.write(data)
+
+ got = ostream.getvalue()
+ self.assertEqual(got, unistring)
+
+class EscapeDecodeTest(unittest.TestCase):
+ def test_empty(self):
+ self.assertEquals(codecs.escape_decode(""), ("", 0))
class RecodingTest(unittest.TestCase):
def test_recoding(self):
More information about the Python-3000-checkins
mailing list