[Python-checkins] r69839 - in python/branches/io-c: Lib/io.py Lib/test/test_memoryio.py Modules/_stringio.c Modules/io.c

antoine.pitrou python-checkins at python.org
Sat Feb 21 19:54:01 CET 2009


Author: antoine.pitrou
Date: Sat Feb 21 19:54:01 2009
New Revision: 69839

Log:
StringIO is now written entirely in C (and blazingly fast)



Modified:
   python/branches/io-c/Lib/io.py
   python/branches/io-c/Lib/test/test_memoryio.py
   python/branches/io-c/Modules/_stringio.c
   python/branches/io-c/Modules/io.c

Modified: python/branches/io-c/Lib/io.py
==============================================================================
--- python/branches/io-c/Lib/io.py	(original)
+++ python/branches/io-c/Lib/io.py	Sat Feb 21 19:54:01 2009
@@ -1847,6 +1847,7 @@
     def newlines(self):
         return self._decoder.newlines if self._decoder else None
 
+StringIO = _io.StringIO
 class unused_StringIO(unused_TextIOWrapper):
     """Text I/O implementation using an in-memory buffer.
 
@@ -1871,101 +1872,6 @@
         self.flush()
         return self.buffer.getvalue().decode(self._encoding, self._errors)
 
-try:
-    class StringIO(_io._StringIO, TextIOBase):
-        """Text I/O implementation using an in-memory buffer.
-
-        The initial_value argument sets the value of object.  The newline
-        argument is like the one of TextIOWrapper's constructor.
-        """
-
-        _read = _io._StringIO.read
-        _readline = _io._StringIO.readline
-        _write = _io._StringIO.write
-        _tell = _io._StringIO.tell
-        _seek = _io._StringIO.seek
-        _truncate = _io._StringIO.truncate
-        _getvalue = _io._StringIO.getvalue
-
-        def getvalue(self) -> str:
-            """Retrieve the entire contents of the object."""
-            if self.closed:
-                raise ValueError("read on closed file")
-            return self._getvalue()
-
-        def write(self, s: str) -> int:
-            """Write string s to file.
-
-            Returns the number of characters written.
-            """
-            if self.closed:
-                raise ValueError("write to closed file")
-            return self._write(s)
-
-        def read(self, n: int = None) -> str:
-            """Read at most n characters, returned as a string.
-
-            If the argument is negative or omitted, read until EOF
-            is reached. Return an empty string at EOF.
-            """
-            if self.closed:
-                raise ValueError("read to closed file")
-            return self._read(n)
-
-        def tell(self) -> int:
-            """Tell the current file position."""
-            if self.closed:
-                raise ValueError("tell from closed file")
-            return self._tell()
-
-        def seek(self, pos: int = None, whence: int = 0) -> int:
-            """Change stream position.
-
-            Seek to character offset pos relative to position indicated by whence:
-                0  Start of stream (the default).  pos should be >= 0;
-                1  Current position - pos must be 0;
-                2  End of stream - pos must be 0.
-            Returns the new absolute position.
-            """
-            if self.closed:
-                raise ValueError("seek from closed file")
-            return self._seek(pos, whence)
-
-        def truncate(self, pos: int = None) -> int:
-            """Truncate size to pos.
-
-            The pos argument defaults to the current file position, as
-            returned by tell().  Imply an absolute seek to pos.
-            Returns the new absolute position.
-            """
-            if self.closed:
-                raise ValueError("truncate from closed file")
-            return self._truncate(pos)
-
-        def readline(self, limit: int = None) -> str:
-            if self.closed:
-                raise ValueError("read from closed file")
-            return self._readline(limit)
-
-        _LF = 1
-        _CR = 2
-        _CRLF = 4
-
-        @property
-        def newlines(self):
-            return (None,
-                    "\n",
-                    "\r",
-                    ("\r", "\n"),
-                    "\r\n",
-                    ("\n", "\r\n"),
-                    ("\r", "\r\n"),
-                    ("\r", "\n", "\r\n")
-                   )[self._seennl]
-
-
-except ImportError:
-    StringIO = _StringIO
 
 # make test_memoryio happy!
 _BytesIO = BytesIO

Modified: python/branches/io-c/Lib/test/test_memoryio.py
==============================================================================
--- python/branches/io-c/Lib/test/test_memoryio.py	(original)
+++ python/branches/io-c/Lib/test/test_memoryio.py	Sat Feb 21 19:54:01 2009
@@ -391,7 +391,7 @@
         self.assertEqual(memio.errors, "strict")
         self.assertEqual(memio.line_buffering, False)
 
-    def test_newlines_none(self):
+    def test_newline_none(self):
         # newline=None
         memio = self.ioclass("a\nb\r\nc\rd", newline=None)
         self.assertEqual(list(memio), ["a\n", "b\n", "c\n", "d"])
@@ -407,7 +407,7 @@
         memio.seek(0)
         self.assertEqual(memio.read(), "a\nb\nc\nd")
 
-    def test_newlines_empty(self):
+    def test_newline_empty(self):
         # newline=""
         memio = self.ioclass("a\nb\r\nc\rd", newline="")
         self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
@@ -418,16 +418,17 @@
         memio = self.ioclass(newline="")
         self.assertEqual(2, memio.write("a\n"))
         self.assertEqual(2, memio.write("b\r"))
-        self.assertEqual(4, memio.write("\nc\rd"))
+        self.assertEqual(2, memio.write("\nc"))
+        self.assertEqual(2, memio.write("\rd"))
         memio.seek(0)
         self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
 
-    def test_newlines_lf(self):
+    def test_newline_lf(self):
         # newline="\n"
         memio = self.ioclass("a\nb\r\nc\rd")
         self.assertEqual(list(memio), ["a\n", "b\r\n", "c\rd"])
 
-    def test_newlines_cr(self):
+    def test_newline_cr(self):
         # newline="\r"
         memio = self.ioclass("a\nb\r\nc\rd", newline="\r")
         memio.seek(0)
@@ -435,7 +436,7 @@
         memio.seek(0)
         self.assertEqual(list(memio), ["a\r", "b\r", "\r", "c\r", "d"])
 
-    def test_newlines_crlf(self):
+    def test_newline_crlf(self):
         # newline="\r\n"
         memio = self.ioclass("a\nb\r\nc\rd", newline="\r\n")
         memio.seek(0)
@@ -469,6 +470,17 @@
         self.assertEqual(memio.tell(), len(buf) * 2)
         self.assertEqual(memio.getvalue(), buf + buf)
 
+    # XXX This test fails with the Python version of io.StringIO
+    def test_newlines_property(self):
+        memio = self.ioclass(newline=None)
+        self.assertEqual(memio.newlines, None)
+        memio.write("a\n")
+        self.assertEqual(memio.newlines, "\n")
+        memio.write("b\r\n")
+        self.assertEqual(memio.newlines, ("\n", "\r\n"))
+        memio.write("c\rd")
+        self.assertEqual(memio.newlines, ("\r", "\n", "\r\n"))
+
 
 def test_main():
     tests = [PyBytesIOTest, PyStringIOTest, CBytesIOTest, CStringIOTest]

Modified: python/branches/io-c/Modules/_stringio.c
==============================================================================
--- python/branches/io-c/Modules/_stringio.c	(original)
+++ python/branches/io-c/Modules/_stringio.c	Sat Feb 21 19:54:01 2009
@@ -13,12 +13,13 @@
     Py_ssize_t string_size;
     size_t buf_size;
 
-    int ok; /* initialized? */
+    char ok; /* initialized? */
+    char closed;
+    char readuniversal;
+    char readtranslate;
     PyObject *decoder;
     PyObject *readnl;
     PyObject *writenl;
-    char readuniversal;
-    char readtranslate;
 } StringIOObject;
 
 #define CHECK_INITIALIZED(self) \
@@ -28,6 +29,20 @@
         return NULL; \
     }
 
+#define CHECK_CLOSED(self) \
+    if (self->closed) { \
+        PyErr_SetString(PyExc_ValueError, \
+            "I/O operation on closed file"); \
+        return NULL; \
+    }
+
+PyDoc_STRVAR(stringio_doc,
+    "Text I/O implementation using an in-memory buffer.\n"
+    "\n"
+    "The initial_value argument sets the value of object.  The newline\n"
+    "argument is like the one of TextIOWrapper's constructor.");
+
+
 /* Internal routine for changing the size, in terms of characters, of the
    buffer of StringIO objects.  The caller should ensure that the 'size'
    argument is non-negative.  Returns 0 on success, -1 otherwise. */
@@ -163,20 +178,34 @@
     return -1;
 }
 
+PyDoc_STRVAR(stringio_getvalue_doc,
+    "Retrieve the entire contents of the object.");
+
 static PyObject *
 stringio_getvalue(StringIOObject *self)
 {
     CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
     return PyUnicode_FromUnicode(self->buf, self->string_size);
 }
 
+PyDoc_STRVAR(stringio_tell_doc,
+    "Tell the current file position.");
+
 static PyObject *
 stringio_tell(StringIOObject *self)
 {
     CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
     return PyLong_FromSsize_t(self->pos);
 }
 
+PyDoc_STRVAR(stringio_read_doc,
+    "Read at most n characters, returned as a string.\n"
+    "\n"
+    "If the argument is negative or omitted, read until EOF\n"
+    "is reached. Return an empty string at EOF.\n");
+
 static PyObject *
 stringio_read(StringIOObject *self, PyObject *args)
 {
@@ -187,6 +216,7 @@
     CHECK_INITIALIZED(self);
     if (!PyArg_ParseTuple(args, "|O:read", &arg))
         return NULL;
+    CHECK_CLOSED(self);
 
     if (PyNumber_Check(arg)) {
         size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -216,29 +246,13 @@
     return PyUnicode_FromUnicode(output, size);
 }
 
+/* Internal helper, used by stringio_readline and stringio_iternext */
 static PyObject *
-stringio_readline(StringIOObject *self, PyObject *args)
+_stringio_readline(StringIOObject *self, Py_ssize_t limit)
 {
-    PyObject *arg = Py_None;
-    Py_ssize_t limit = -1;
     Py_UNICODE *start, *end, old_char;
     Py_ssize_t len, consumed;
 
-    CHECK_INITIALIZED(self);
-    if (!PyArg_ParseTuple(args, "|O:readline", &arg))
-        return NULL;
-
-    if (PyNumber_Check(arg)) {
-        limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
-        if (limit == -1 && PyErr_Occurred())
-            return NULL;
-    }
-    else if (arg != Py_None) {
-        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
-                     Py_TYPE(arg)->tp_name);
-        return NULL;
-    }
-
     /* In case of overseek, return the empty string */
     if (self->pos >= self->string_size)
         return PyUnicode_FromString("");
@@ -262,6 +276,79 @@
     return PyUnicode_FromUnicode(start, len);
 }
 
+PyDoc_STRVAR(stringio_readline_doc,
+    "Read until newline or EOF.\n"
+    "\n"
+    "Returns an empty string if EOF is hit immediately.\n");
+
+static PyObject *
+stringio_readline(StringIOObject *self, PyObject *args)
+{
+    PyObject *arg = Py_None;
+    Py_ssize_t limit = -1;
+
+    CHECK_INITIALIZED(self);
+    if (!PyArg_ParseTuple(args, "|O:readline", &arg))
+        return NULL;
+    CHECK_CLOSED(self);
+
+    if (PyNumber_Check(arg)) {
+        limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
+        if (limit == -1 && PyErr_Occurred())
+            return NULL;
+    }
+    else if (arg != Py_None) {
+        PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
+                     Py_TYPE(arg)->tp_name);
+        return NULL;
+    }
+    return _stringio_readline(self, limit);
+}
+
+static PyObject *
+stringio_iternext(StringIOObject *self)
+{
+    PyObject *line;
+
+    CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
+
+    if (Py_TYPE(self) == &PyStringIO_Type) {
+        /* Skip method call overhead for speed */
+        line = _stringio_readline(self, -1);
+    }
+    else {
+        /* XXX is subclassing StringIO really supported? */
+        line = PyObject_CallMethodObjArgs((PyObject *)self,
+                                           _PyIO_str_readline, NULL);
+        if (line && !PyUnicode_Check(line)) {
+            PyErr_Format(PyExc_IOError,
+                         "readline() should have returned an str object, "
+                         "not '%.200s'", Py_TYPE(line)->tp_name);
+            Py_DECREF(line);
+            return NULL;
+        }
+    }
+
+    if (line == NULL)
+        return NULL;
+
+    if (PyUnicode_GET_SIZE(line) == 0) {
+        /* Reached EOF */
+        Py_DECREF(line);
+        return NULL;
+    }
+
+    return line;
+}
+
+PyDoc_STRVAR(stringio_truncate_doc,
+    "Truncate size to pos.\n"
+    "\n"
+    "The pos argument defaults to the current file position, as\n"
+    "returned by tell().  Imply an absolute seek to pos.\n"
+    "Returns the new absolute position.\n");
+
 static PyObject *
 stringio_truncate(StringIOObject *self, PyObject *args)
 {
@@ -271,6 +358,7 @@
     CHECK_INITIALIZED(self);
     if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
         return NULL;
+    CHECK_CLOSED(self);
 
     if (PyNumber_Check(arg)) {
         size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -303,6 +391,15 @@
     return PyLong_FromSsize_t(size);
 }
 
+PyDoc_STRVAR(stringio_seek_doc,
+    "Change stream position.\n"
+    "\n"
+    "Seek to character offset pos relative to position indicated by whence:\n"
+    "    0  Start of stream (the default).  pos should be >= 0;\n"
+    "    1  Current position - pos must be 0;\n"
+    "    2  End of stream - pos must be 0.\n"
+    "Returns the new absolute position.\n");
+
 static PyObject *
 stringio_seek(StringIOObject *self, PyObject *args)
 {
@@ -312,6 +409,7 @@
     CHECK_INITIALIZED(self);
     if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
         return NULL;
+    CHECK_CLOSED(self);
 
     if (mode != 0 && mode != 1 && mode != 2) {
         PyErr_Format(PyExc_ValueError,
@@ -344,6 +442,12 @@
     return PyLong_FromSsize_t(self->pos);
 }
 
+PyDoc_STRVAR(stringio_write_doc,
+    "Write string to file.\n"
+    "\n"
+    "Returns the number of characters written, which is always equal to\n"
+    "the length of the string.\n");
+
 static PyObject *
 stringio_write(StringIOObject *self, PyObject *obj)
 {
@@ -355,6 +459,7 @@
                      Py_TYPE(obj)->tp_name);
         return NULL;
     }
+    CHECK_CLOSED(self);
     size = PyUnicode_GET_SIZE(obj);
 
     if (size > 0 && write_str(self, obj) < 0)
@@ -363,13 +468,33 @@
     return PyLong_FromSsize_t(size);
 }
 
+PyDoc_STRVAR(stringio_close_doc,
+    "Close the IO object. Attempting any further operation after the\n"
+    "object is closed will raise a ValueError.\n"
+    "\n"
+    "This method has no effect if the file is already closed.\n");
+
+static PyObject *
+stringio_close(StringIOObject *self)
+{
+    self->closed = 1;
+    /* Free up some memory */
+    if (resize_buffer(self, 0) < 0)
+        return NULL;
+    Py_CLEAR(self->readnl);
+    Py_CLEAR(self->writenl);
+    Py_CLEAR(self->decoder);
+    Py_RETURN_NONE;
+}
+
 static void
 stringio_dealloc(StringIOObject *self)
 {
     Py_CLEAR(self->readnl);
     Py_CLEAR(self->writenl);
     Py_CLEAR(self->decoder);
-    PyMem_Free(self->buf);
+    if (self->buf)
+        PyMem_Free(self->buf);
     Py_TYPE(self)->tp_free(self);
 }
 
@@ -472,11 +597,12 @@
     }
     self->pos = 0;
 
-
+    self->closed = 0;
     self->ok = 1;
     return 0;
 }
 
+/* Properties and pseudo-properties */
 static PyObject *
 stringio_seekable(StringIOObject *self, PyObject *args)
 {
@@ -507,9 +633,17 @@
 }
 
 static PyObject *
+stringio_closed(StringIOObject *self, void *context)
+{
+    CHECK_INITIALIZED(self);
+    return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
 stringio_encoding(StringIOObject *self, void *context)
 {
     CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
     return PyUnicode_FromString("utf-8");
 }
 
@@ -517,6 +651,7 @@
 stringio_errors(StringIOObject *self, void *context)
 {
     CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
     return PyUnicode_FromString("strict");
 }
 
@@ -524,25 +659,39 @@
 stringio_line_buffering(StringIOObject *self, void *context)
 {
     CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
     Py_RETURN_FALSE;
 }
 
+static PyObject *
+stringio_newlines(StringIOObject *self, void *context)
+{
+    CHECK_INITIALIZED(self);
+    CHECK_CLOSED(self);
+    if (self->decoder == NULL)
+        Py_RETURN_NONE;
+    return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
+}
+
 static struct PyMethodDef stringio_methods[] = {
-    {"getvalue",   (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
-    {"read",       (PyCFunction)stringio_read,     METH_VARARGS, NULL},
-    {"readline",   (PyCFunction)stringio_readline, METH_VARARGS, NULL},
-    {"tell",       (PyCFunction)stringio_tell,     METH_NOARGS,  NULL},
-    {"truncate",   (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
-    {"seek",       (PyCFunction)stringio_seek,     METH_VARARGS, NULL},
-    {"write",      (PyCFunction)stringio_write,    METH_O,       NULL},
+    {"close",    (PyCFunction)stringio_close,    METH_NOARGS,  stringio_close_doc},
+    {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
+    {"read",     (PyCFunction)stringio_read,     METH_VARARGS, stringio_read_doc},
+    {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
+    {"tell",     (PyCFunction)stringio_tell,     METH_NOARGS,  stringio_tell_doc},
+    {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
+    {"seek",     (PyCFunction)stringio_seek,     METH_VARARGS, stringio_seek_doc},
+    {"write",    (PyCFunction)stringio_write,    METH_O,       stringio_write_doc},
     
-    {"seekable",   (PyCFunction)stringio_seekable, METH_NOARGS},
-    {"readable",   (PyCFunction)stringio_readable, METH_NOARGS},
-    {"writable",   (PyCFunction)stringio_writable, METH_NOARGS},
+    {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
+    {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
+    {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
     {NULL, NULL}        /* sentinel */
 };
 
 static PyGetSetDef stringio_getset[] = {
+    {"closed",         (getter)stringio_closed,         NULL, NULL},
+    {"newlines",       (getter)stringio_newlines,       NULL, NULL},
     /*  (following comments straight off of the original Python wrapper:)
         XXX Cruft to support the TextIOWrapper API. This would only
         be meaningful if StringIO supported the buffer attribute.
@@ -558,7 +707,7 @@
 
 PyTypeObject PyStringIO_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    "_StringIO",                               /*tp_name*/
+    "StringIO",                                /*tp_name*/
     sizeof(StringIOObject),                    /*tp_basicsize*/
     0,                                         /*tp_itemsize*/
     (destructor)stringio_dealloc,              /*tp_dealloc*/
@@ -577,13 +726,13 @@
     0,                                         /*tp_setattro*/
     0,                                         /*tp_as_buffer*/
     Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE,  /*tp_flags*/
-    0,                                         /*tp_doc*/
+    stringio_doc,                              /*tp_doc*/
     0,                                         /*tp_traverse*/
     0,                                         /*tp_clear*/
     0,                                         /*tp_richcompare*/
     0,                                         /*tp_weaklistoffset*/
     0,                                         /*tp_iter*/
-    0,                                         /*tp_iternext*/
+    (iternextfunc)stringio_iternext,           /*tp_iternext*/
     stringio_methods,                          /*tp_methods*/
     0,                                         /*tp_members*/
     stringio_getset,                           /*tp_getset*/

Modified: python/branches/io-c/Modules/io.c
==============================================================================
--- python/branches/io-c/Modules/io.c	(original)
+++ python/branches/io-c/Modules/io.c	Sat Feb 21 19:54:01 2009
@@ -671,8 +671,8 @@
     ADD_TYPE(&PyBytesIO_Type, "BytesIO");
 
     /* StringIO */
-    /* PyStringIO_Type.tp_base = &PyTextIOBase_Type; */
-    ADD_TYPE(&PyStringIO_Type, "_StringIO");
+    PyStringIO_Type.tp_base = &PyTextIOBase_Type;
+    ADD_TYPE(&PyStringIO_Type, "StringIO");
 
     /* BufferedReader */
     PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type;


More information about the Python-checkins mailing list