[Python-checkins] r55770 - in python/trunk: Lib/test/test_multibytecodec.py Misc/NEWS Modules/cjkcodecs/multibytecodec.c

hyeshik.chang python-checkins at python.org
Tue Jun 5 20:58:56 CEST 2007


Author: hyeshik.chang
Date: Tue Jun  5 20:58:51 2007
New Revision: 55770

Modified:
   python/trunk/Lib/test/test_multibytecodec.py
   python/trunk/Misc/NEWS
   python/trunk/Modules/cjkcodecs/multibytecodec.c
Log:
Bug #1728403: Fix a bug that CJKCodecs StreamReader hangs when it
reads a file that ends with incomplete sequence and sizehint argument
for .read() is specified.


Modified: python/trunk/Lib/test/test_multibytecodec.py
==============================================================================
--- python/trunk/Lib/test/test_multibytecodec.py	(original)
+++ python/trunk/Lib/test/test_multibytecodec.py	Tue Jun  5 20:58:51 2007
@@ -136,11 +136,19 @@
         self.assertRaises(UnicodeDecodeError, decoder.decode, '', True)
         self.assertEqual(decoder.decode('B@$'), u'\u4e16')
 
+class Test_StreamReader(unittest.TestCase):
+    def test_bug1728403(self):
+        try:
+            open(TESTFN, 'w').write('\xa1')
+            f = codecs.open(TESTFN, encoding='cp949')
+            self.assertRaises(UnicodeDecodeError, f.read, 2)
+        finally:
+            os.unlink(TESTFN)
 
 class Test_StreamWriter(unittest.TestCase):
     if len(u'\U00012345') == 2: # UCS2
         def test_gb18030(self):
-            s= StringIO.StringIO()
+            s = StringIO.StringIO()
             c = codecs.getwriter('gb18030')(s)
             c.write(u'123')
             self.assertEqual(s.getvalue(), '123')

Modified: python/trunk/Misc/NEWS
==============================================================================
--- python/trunk/Misc/NEWS	(original)
+++ python/trunk/Misc/NEWS	Tue Jun  5 20:58:51 2007
@@ -222,6 +222,10 @@
 Library
 -------
 
+- Bug #1728403: Fix a bug that CJKCodecs StreamReader hangs when it
+  reads a file that ends with incomplete sequence and sizehint argument
+  for .read() is specified.
+
 - Bug #1730389: Change time.strptime() to use ``\s+`` instead of ``\s*`` when
   matching spaces in the specified format argument.
 

Modified: python/trunk/Modules/cjkcodecs/multibytecodec.c
==============================================================================
--- python/trunk/Modules/cjkcodecs/multibytecodec.c	(original)
+++ python/trunk/Modules/cjkcodecs/multibytecodec.c	Tue Jun  5 20:58:51 2007
@@ -1214,6 +1214,8 @@
 	cres = NULL;
 
 	for (;;) {
+		int endoffile;
+
 		if (sizehint < 0)
 			cres = PyObject_CallMethod(self->stream,
 					(char *)method, NULL);
@@ -1230,6 +1232,8 @@
 			goto errorexit;
 		}
 
+		endoffile = (PyString_GET_SIZE(cres) == 0);
+
 		if (self->pendingsize > 0) {
 			PyObject *ctr;
 			char *ctrdata;
@@ -1257,7 +1261,7 @@
 				(MultibyteStatefulDecoderContext *)self, &buf))
 			goto errorexit;
 
-		if (rsize == 0 || sizehint < 0) { /* end of file */
+		if (endoffile || sizehint < 0) {
 			if (buf.inbuf < buf.inbuf_end &&
 			    multibytecodec_decerror(self->codec, &self->state,
 					&buf, self->errors, MBERR_TOOFEW))


More information about the Python-checkins mailing list