[Python-checkins] cpython: Issue #21057: TextIOWrapper now allows the underlying binary stream's read() or

antoine.pitrou python-checkins at python.org
Tue Apr 29 10:27:26 CEST 2014


http://hg.python.org/cpython/rev/2a1d63f09560
changeset:   90508:2a1d63f09560
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Tue Apr 29 10:14:02 2014 +0200
summary:
  Issue #21057: TextIOWrapper now allows the underlying binary stream's read() or read1() method to return an arbitrary bytes-like object (such as a memoryview).

Patch by Nikolaus Rath.

files:
  Lib/test/test_io.py  |  28 ++++++++++++++++++++++++++
  Misc/NEWS            |   4 +++
  Modules/_io/textio.c |  34 ++++++++++++++++++-------------
  3 files changed, 52 insertions(+), 14 deletions(-)


diff --git a/Lib/test/test_io.py b/Lib/test/test_io.py
--- a/Lib/test/test_io.py
+++ b/Lib/test/test_io.py
@@ -2681,6 +2681,34 @@
         self.assertFalse(err)
         self.assertEqual("ok", out.decode().strip())
 
+    def test_read_byteslike(self):
+        r = MemviewBytesIO(b'Just some random string\n')
+        t = self.TextIOWrapper(r, 'utf-8')
+
+        # TextIOwrapper will not read the full string, because
+        # we truncate it to a multiple of the native int size
+        # so that we can construct a more complex memoryview.
+        bytes_val =  _to_memoryview(r.getvalue()).tobytes()
+
+        self.assertEqual(t.read(200), bytes_val.decode('utf-8'))
+
+class MemviewBytesIO(io.BytesIO):
+    '''A BytesIO object whose read method returns memoryviews
+       rather than bytes'''
+
+    def read1(self, len_):
+        return _to_memoryview(super().read1(len_))
+
+    def read(self, len_):
+        return _to_memoryview(super().read(len_))
+
+def _to_memoryview(buf):
+    '''Convert bytes-object *buf* to a non-trivial memoryview'''
+
+    arr = array.array('i')
+    idx = len(buf) - len(buf) % arr.itemsize
+    arr.frombytes(buf[:idx])
+    return memoryview(arr)
 
 class CTextIOWrapperTest(TextIOWrapperTest):
     io = io
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -60,6 +60,10 @@
 Library
 -------
 
+- Issue #21057: TextIOWrapper now allows the underlying binary stream's
+  read() or read1() method to return an arbitrary bytes-like object
+  (such as a memoryview).  Patch by Nikolaus Rath.
+
 - Issue #20951: SSLSocket.send() now raises either SSLWantReadError or
   SSLWantWriteError on a non-blocking socket if the operation would block.
   Previously, it would return 0.  Patch by Nikolaus Rath.
diff --git a/Modules/_io/textio.c b/Modules/_io/textio.c
--- a/Modules/_io/textio.c
+++ b/Modules/_io/textio.c
@@ -1439,6 +1439,7 @@
     PyObject *dec_buffer = NULL;
     PyObject *dec_flags = NULL;
     PyObject *input_chunk = NULL;
+    Py_buffer input_chunk_buf;
     PyObject *decoded_chars, *chunk_size;
     Py_ssize_t nbytes, nchars;
     int eof;
@@ -1470,6 +1471,15 @@
             Py_DECREF(state);
             return -1;
         }
+
+        if (!PyBytes_Check(dec_buffer)) {
+            PyErr_Format(PyExc_TypeError,
+                         "decoder getstate() should have returned a bytes "
+                         "object, not '%.200s'",
+                         Py_TYPE(dec_buffer)->tp_name);
+            Py_DECREF(state);
+            return -1;
+        }
         Py_INCREF(dec_buffer);
         Py_INCREF(dec_flags);
         Py_DECREF(state);
@@ -1482,23 +1492,24 @@
     chunk_size = PyLong_FromSsize_t(Py_MAX(self->chunk_size, size_hint));
     if (chunk_size == NULL)
         goto fail;
+
     input_chunk = PyObject_CallMethodObjArgs(self->buffer,
         (self->has_read1 ? _PyIO_str_read1: _PyIO_str_read),
         chunk_size, NULL);
     Py_DECREF(chunk_size);
     if (input_chunk == NULL)
         goto fail;
-    if (!PyBytes_Check(input_chunk)) {
+
+    if (PyObject_GetBuffer(input_chunk, &input_chunk_buf, 0) != 0) {
         PyErr_Format(PyExc_TypeError,
-                     "underlying %s() should have returned a bytes object, "
+                     "underlying %s() should have returned a bytes-like object, "
                      "not '%.200s'", (self->has_read1 ? "read1": "read"),
                      Py_TYPE(input_chunk)->tp_name);
         goto fail;
     }
 
-    nbytes = PyBytes_Size(input_chunk);
+    nbytes = input_chunk_buf.len;
     eof = (nbytes == 0);
-
     if (Py_TYPE(self->decoder) == &PyIncrementalNewlineDecoder_Type) {
         decoded_chars = _PyIncrementalNewlineDecoder_decode(
             self->decoder, input_chunk, eof);
@@ -1507,6 +1518,7 @@
         decoded_chars = PyObject_CallMethodObjArgs(self->decoder,
             _PyIO_str_decode, input_chunk, eof ? Py_True : Py_False, NULL);
     }
+    PyBuffer_Release(&input_chunk_buf);
 
     if (check_decoded(decoded_chars) < 0)
         goto fail;
@@ -1523,18 +1535,12 @@
         /* At the snapshot point, len(dec_buffer) bytes before the read, the
          * next input to be decoded is dec_buffer + input_chunk.
          */
-        PyObject *next_input = PyNumber_Add(dec_buffer, input_chunk);
-        if (next_input == NULL)
-            goto fail;
-        if (!PyBytes_Check(next_input)) {
-            PyErr_Format(PyExc_TypeError,
-                         "decoder getstate() should have returned a bytes "
-                         "object, not '%.200s'",
-                         Py_TYPE(next_input)->tp_name);
-            Py_DECREF(next_input);
+        PyObject *next_input = dec_buffer;
+        PyBytes_Concat(&next_input, input_chunk);
+        if (next_input == NULL) {
+            dec_buffer = NULL; /* Reference lost to PyBytes_Concat */
             goto fail;
         }
-        Py_DECREF(dec_buffer);
         Py_CLEAR(self->snapshot);
         self->snapshot = Py_BuildValue("NN", dec_flags, next_input);
     }

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list