[Python-checkins] r68505 - in sandbox/trunk/io-c: _textio.c test_io.py

antoine.pitrou python-checkins at python.org
Sat Jan 10 21:59:11 CET 2009


Author: antoine.pitrou
Date: Sat Jan 10 21:59:11 2009
New Revision: 68505

Log:
Fix readline() bug on NUL chars (Py_UNICODE_strchr stops at the first NUL character!)



Modified:
   sandbox/trunk/io-c/_textio.c
   sandbox/trunk/io-c/test_io.py

Modified: sandbox/trunk/io-c/_textio.c
==============================================================================
--- sandbox/trunk/io-c/_textio.c	(original)
+++ sandbox/trunk/io-c/_textio.c	Sat Jan 10 21:59:11 2009
@@ -924,6 +924,29 @@
     return NULL;
 }
 
+static Py_UNICODE *
+find_LF(Py_UNICODE *start, Py_UNICODE *end)
+{
+    Py_UNICODE *s = start;
+    while (s < end) {
+        if (*s == '\n')
+            return s;
+        s++;
+    }
+    return NULL;
+}
+
+static Py_UNICODE *
+find_CR(Py_UNICODE *start, Py_UNICODE *end)
+{
+    Py_UNICODE *s = start;
+    while (s < end) {
+        if (*s == '\r')
+            return s;
+        s++;
+    }
+    return NULL;
+}
 
 static PyObject *
 _TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
@@ -948,7 +971,8 @@
         Py_UNICODE* ptr = PyUnicode_AS_UNICODE(line);
         if (self->readtranslate) {
             /* Newlines are already translated, only search for \n */
-            Py_UNICODE* pos = Py_UNICODE_strchr(ptr + start, '\n');
+            Py_UNICODE *pos = find_LF(ptr + start,
+                                      ptr + PyUnicode_GET_SIZE(line));
             if (pos != NULL) {
                 endpos = pos - ptr + 1;
                 break;
@@ -964,8 +988,10 @@
             /* In C we'd look for these in parallel of course.
              * XXX Hey!
              */
-            Py_UNICODE* nlpos = Py_UNICODE_strchr(ptr + start, '\n');
-            Py_UNICODE* crpos = Py_UNICODE_strchr(ptr + start, '\r');
+            Py_UNICODE* nlpos = find_LF(ptr + start,
+                                        ptr + PyUnicode_GET_SIZE(line));
+            Py_UNICODE* crpos = find_CR(ptr + start,
+                                        ptr + PyUnicode_GET_SIZE(line));
             if (crpos == NULL) {
                 if (nlpos == NULL) {
                     /* Nothing found */

Modified: sandbox/trunk/io-c/test_io.py
==============================================================================
--- sandbox/trunk/io-c/test_io.py	(original)
+++ sandbox/trunk/io-c/test_io.py	Sat Jan 10 21:59:11 2009
@@ -232,14 +232,15 @@
 
     def test_readline(self):
         f = io.open(support.TESTFN, "wb")
-        f.write(b"abc\ndef\nxyzzy\nfoo")
+        f.write(b"abc\ndef\nxyzzy\nfoo\x00bar\nanother line")
         f.close()
         f = io.open(support.TESTFN, "rb")
         self.assertEqual(f.readline(), b"abc\n")
         self.assertEqual(f.readline(10), b"def\n")
         self.assertEqual(f.readline(2), b"xy")
         self.assertEqual(f.readline(4), b"zzy\n")
-        self.assertEqual(f.readline(), b"foo")
+        self.assertEqual(f.readline(), b"foo\x00bar\n")
+        self.assertEqual(f.readline(), b"another line")
         f.close()
 
     def test_raw_bytes_io(self):
@@ -1226,14 +1227,14 @@
                         self.assertEquals(len(got_lines), len(exp_lines))
 
     def testNewlinesInput(self):
-        testdata = b"AAA\nBBB\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
+        testdata = b"AAA\nBB\x00B\nCCC\rDDD\rEEE\r\nFFF\r\nGGG"
         normalized = testdata.replace(b"\r\n", b"\n").replace(b"\r", b"\n")
         for newline, expected in [
             (None, normalized.decode("ascii").splitlines(True)),
             ("", testdata.decode("ascii").splitlines(True)),
-            ("\n", ["AAA\n", "BBB\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
-            ("\r\n", ["AAA\nBBB\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
-            ("\r",  ["AAA\nBBB\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
+            ("\n", ["AAA\n", "BB\x00B\n", "CCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r\n", ["AAA\nBB\x00B\nCCC\rDDD\rEEE\r\n", "FFF\r\n", "GGG"]),
+            ("\r",  ["AAA\nBB\x00B\nCCC\r", "DDD\r", "EEE\r", "\nFFF\r", "\nGGG"]),
             ]:
             buf = io.BytesIO(testdata)
             txt = io.TextIOWrapper(buf, encoding="ascii", newline=newline)


More information about the Python-checkins mailing list