[Python-checkins] r69839 - in python/branches/io-c: Lib/io.py Lib/test/test_memoryio.py Modules/_stringio.c Modules/io.c
antoine.pitrou
python-checkins at python.org
Sat Feb 21 19:54:01 CET 2009
Author: antoine.pitrou
Date: Sat Feb 21 19:54:01 2009
New Revision: 69839
Log:
StringIO is now written entirely in C (and blazingly fast)
Modified:
python/branches/io-c/Lib/io.py
python/branches/io-c/Lib/test/test_memoryio.py
python/branches/io-c/Modules/_stringio.c
python/branches/io-c/Modules/io.c
Modified: python/branches/io-c/Lib/io.py
==============================================================================
--- python/branches/io-c/Lib/io.py (original)
+++ python/branches/io-c/Lib/io.py Sat Feb 21 19:54:01 2009
@@ -1847,6 +1847,7 @@
def newlines(self):
return self._decoder.newlines if self._decoder else None
+StringIO = _io.StringIO
class unused_StringIO(unused_TextIOWrapper):
"""Text I/O implementation using an in-memory buffer.
@@ -1871,101 +1872,6 @@
self.flush()
return self.buffer.getvalue().decode(self._encoding, self._errors)
-try:
- class StringIO(_io._StringIO, TextIOBase):
- """Text I/O implementation using an in-memory buffer.
-
- The initial_value argument sets the value of object. The newline
- argument is like the one of TextIOWrapper's constructor.
- """
-
- _read = _io._StringIO.read
- _readline = _io._StringIO.readline
- _write = _io._StringIO.write
- _tell = _io._StringIO.tell
- _seek = _io._StringIO.seek
- _truncate = _io._StringIO.truncate
- _getvalue = _io._StringIO.getvalue
-
- def getvalue(self) -> str:
- """Retrieve the entire contents of the object."""
- if self.closed:
- raise ValueError("read on closed file")
- return self._getvalue()
-
- def write(self, s: str) -> int:
- """Write string s to file.
-
- Returns the number of characters written.
- """
- if self.closed:
- raise ValueError("write to closed file")
- return self._write(s)
-
- def read(self, n: int = None) -> str:
- """Read at most n characters, returned as a string.
-
- If the argument is negative or omitted, read until EOF
- is reached. Return an empty string at EOF.
- """
- if self.closed:
- raise ValueError("read to closed file")
- return self._read(n)
-
- def tell(self) -> int:
- """Tell the current file position."""
- if self.closed:
- raise ValueError("tell from closed file")
- return self._tell()
-
- def seek(self, pos: int = None, whence: int = 0) -> int:
- """Change stream position.
-
- Seek to character offset pos relative to position indicated by whence:
- 0 Start of stream (the default). pos should be >= 0;
- 1 Current position - pos must be 0;
- 2 End of stream - pos must be 0.
- Returns the new absolute position.
- """
- if self.closed:
- raise ValueError("seek from closed file")
- return self._seek(pos, whence)
-
- def truncate(self, pos: int = None) -> int:
- """Truncate size to pos.
-
- The pos argument defaults to the current file position, as
- returned by tell(). Imply an absolute seek to pos.
- Returns the new absolute position.
- """
- if self.closed:
- raise ValueError("truncate from closed file")
- return self._truncate(pos)
-
- def readline(self, limit: int = None) -> str:
- if self.closed:
- raise ValueError("read from closed file")
- return self._readline(limit)
-
- _LF = 1
- _CR = 2
- _CRLF = 4
-
- @property
- def newlines(self):
- return (None,
- "\n",
- "\r",
- ("\r", "\n"),
- "\r\n",
- ("\n", "\r\n"),
- ("\r", "\r\n"),
- ("\r", "\n", "\r\n")
- )[self._seennl]
-
-
-except ImportError:
- StringIO = _StringIO
# make test_memoryio happy!
_BytesIO = BytesIO
Modified: python/branches/io-c/Lib/test/test_memoryio.py
==============================================================================
--- python/branches/io-c/Lib/test/test_memoryio.py (original)
+++ python/branches/io-c/Lib/test/test_memoryio.py Sat Feb 21 19:54:01 2009
@@ -391,7 +391,7 @@
self.assertEqual(memio.errors, "strict")
self.assertEqual(memio.line_buffering, False)
- def test_newlines_none(self):
+ def test_newline_none(self):
# newline=None
memio = self.ioclass("a\nb\r\nc\rd", newline=None)
self.assertEqual(list(memio), ["a\n", "b\n", "c\n", "d"])
@@ -407,7 +407,7 @@
memio.seek(0)
self.assertEqual(memio.read(), "a\nb\nc\nd")
- def test_newlines_empty(self):
+ def test_newline_empty(self):
# newline=""
memio = self.ioclass("a\nb\r\nc\rd", newline="")
self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
@@ -418,16 +418,17 @@
memio = self.ioclass(newline="")
self.assertEqual(2, memio.write("a\n"))
self.assertEqual(2, memio.write("b\r"))
- self.assertEqual(4, memio.write("\nc\rd"))
+ self.assertEqual(2, memio.write("\nc"))
+ self.assertEqual(2, memio.write("\rd"))
memio.seek(0)
self.assertEqual(list(memio), ["a\n", "b\r\n", "c\r", "d"])
- def test_newlines_lf(self):
+ def test_newline_lf(self):
# newline="\n"
memio = self.ioclass("a\nb\r\nc\rd")
self.assertEqual(list(memio), ["a\n", "b\r\n", "c\rd"])
- def test_newlines_cr(self):
+ def test_newline_cr(self):
# newline="\r"
memio = self.ioclass("a\nb\r\nc\rd", newline="\r")
memio.seek(0)
@@ -435,7 +436,7 @@
memio.seek(0)
self.assertEqual(list(memio), ["a\r", "b\r", "\r", "c\r", "d"])
- def test_newlines_crlf(self):
+ def test_newline_crlf(self):
# newline="\r\n"
memio = self.ioclass("a\nb\r\nc\rd", newline="\r\n")
memio.seek(0)
@@ -469,6 +470,17 @@
self.assertEqual(memio.tell(), len(buf) * 2)
self.assertEqual(memio.getvalue(), buf + buf)
+ # XXX This test fails with the Python version of io.StringIO
+ def test_newlines_property(self):
+ memio = self.ioclass(newline=None)
+ self.assertEqual(memio.newlines, None)
+ memio.write("a\n")
+ self.assertEqual(memio.newlines, "\n")
+ memio.write("b\r\n")
+ self.assertEqual(memio.newlines, ("\n", "\r\n"))
+ memio.write("c\rd")
+ self.assertEqual(memio.newlines, ("\r", "\n", "\r\n"))
+
def test_main():
tests = [PyBytesIOTest, PyStringIOTest, CBytesIOTest, CStringIOTest]
Modified: python/branches/io-c/Modules/_stringio.c
==============================================================================
--- python/branches/io-c/Modules/_stringio.c (original)
+++ python/branches/io-c/Modules/_stringio.c Sat Feb 21 19:54:01 2009
@@ -13,12 +13,13 @@
Py_ssize_t string_size;
size_t buf_size;
- int ok; /* initialized? */
+ char ok; /* initialized? */
+ char closed;
+ char readuniversal;
+ char readtranslate;
PyObject *decoder;
PyObject *readnl;
PyObject *writenl;
- char readuniversal;
- char readtranslate;
} StringIOObject;
#define CHECK_INITIALIZED(self) \
@@ -28,6 +29,20 @@
return NULL; \
}
+#define CHECK_CLOSED(self) \
+ if (self->closed) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file"); \
+ return NULL; \
+ }
+
+PyDoc_STRVAR(stringio_doc,
+ "Text I/O implementation using an in-memory buffer.\n"
+ "\n"
+ "The initial_value argument sets the value of object. The newline\n"
+ "argument is like the one of TextIOWrapper's constructor.");
+
+
/* Internal routine for changing the size, in terms of characters, of the
buffer of StringIO objects. The caller should ensure that the 'size'
argument is non-negative. Returns 0 on success, -1 otherwise. */
@@ -163,20 +178,34 @@
return -1;
}
+PyDoc_STRVAR(stringio_getvalue_doc,
+ "Retrieve the entire contents of the object.");
+
static PyObject *
stringio_getvalue(StringIOObject *self)
{
CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyUnicode_FromUnicode(self->buf, self->string_size);
}
+PyDoc_STRVAR(stringio_tell_doc,
+ "Tell the current file position.");
+
static PyObject *
stringio_tell(StringIOObject *self)
{
CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_read_doc,
+ "Read at most n characters, returned as a string.\n"
+ "\n"
+ "If the argument is negative or omitted, read until EOF\n"
+ "is reached. Return an empty string at EOF.\n");
+
static PyObject *
stringio_read(StringIOObject *self, PyObject *args)
{
@@ -187,6 +216,7 @@
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:read", &arg))
return NULL;
+ CHECK_CLOSED(self);
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -216,29 +246,13 @@
return PyUnicode_FromUnicode(output, size);
}
+/* Internal helper, used by stringio_readline and stringio_iternext */
static PyObject *
-stringio_readline(StringIOObject *self, PyObject *args)
+_stringio_readline(StringIOObject *self, Py_ssize_t limit)
{
- PyObject *arg = Py_None;
- Py_ssize_t limit = -1;
Py_UNICODE *start, *end, old_char;
Py_ssize_t len, consumed;
- CHECK_INITIALIZED(self);
- if (!PyArg_ParseTuple(args, "|O:readline", &arg))
- return NULL;
-
- if (PyNumber_Check(arg)) {
- limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
- if (limit == -1 && PyErr_Occurred())
- return NULL;
- }
- else if (arg != Py_None) {
- PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
- Py_TYPE(arg)->tp_name);
- return NULL;
- }
-
/* In case of overseek, return the empty string */
if (self->pos >= self->string_size)
return PyUnicode_FromString("");
@@ -262,6 +276,79 @@
return PyUnicode_FromUnicode(start, len);
}
+PyDoc_STRVAR(stringio_readline_doc,
+ "Read until newline or EOF.\n"
+ "\n"
+ "Returns an empty string if EOF is hit immediately.\n");
+
+static PyObject *
+stringio_readline(StringIOObject *self, PyObject *args)
+{
+ PyObject *arg = Py_None;
+ Py_ssize_t limit = -1;
+
+ CHECK_INITIALIZED(self);
+ if (!PyArg_ParseTuple(args, "|O:readline", &arg))
+ return NULL;
+ CHECK_CLOSED(self);
+
+ if (PyNumber_Check(arg)) {
+ limit = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
+ if (limit == -1 && PyErr_Occurred())
+ return NULL;
+ }
+ else if (arg != Py_None) {
+ PyErr_Format(PyExc_TypeError, "integer argument expected, got '%s'",
+ Py_TYPE(arg)->tp_name);
+ return NULL;
+ }
+ return _stringio_readline(self, limit);
+}
+
+static PyObject *
+stringio_iternext(StringIOObject *self)
+{
+ PyObject *line;
+
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
+ if (Py_TYPE(self) == &PyStringIO_Type) {
+ /* Skip method call overhead for speed */
+ line = _stringio_readline(self, -1);
+ }
+ else {
+ /* XXX is subclassing StringIO really supported? */
+ line = PyObject_CallMethodObjArgs((PyObject *)self,
+ _PyIO_str_readline, NULL);
+ if (line && !PyUnicode_Check(line)) {
+ PyErr_Format(PyExc_IOError,
+ "readline() should have returned an str object, "
+ "not '%.200s'", Py_TYPE(line)->tp_name);
+ Py_DECREF(line);
+ return NULL;
+ }
+ }
+
+ if (line == NULL)
+ return NULL;
+
+ if (PyUnicode_GET_SIZE(line) == 0) {
+ /* Reached EOF */
+ Py_DECREF(line);
+ return NULL;
+ }
+
+ return line;
+}
+
+PyDoc_STRVAR(stringio_truncate_doc,
+ "Truncate size to pos.\n"
+ "\n"
+ "The pos argument defaults to the current file position, as\n"
+ "returned by tell(). Imply an absolute seek to pos.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_truncate(StringIOObject *self, PyObject *args)
{
@@ -271,6 +358,7 @@
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|O:truncate", &arg))
return NULL;
+ CHECK_CLOSED(self);
if (PyNumber_Check(arg)) {
size = PyNumber_AsSsize_t(arg, PyExc_OverflowError);
@@ -303,6 +391,15 @@
return PyLong_FromSsize_t(size);
}
+PyDoc_STRVAR(stringio_seek_doc,
+ "Change stream position.\n"
+ "\n"
+ "Seek to character offset pos relative to position indicated by whence:\n"
+ " 0 Start of stream (the default). pos should be >= 0;\n"
+ " 1 Current position - pos must be 0;\n"
+ " 2 End of stream - pos must be 0.\n"
+ "Returns the new absolute position.\n");
+
static PyObject *
stringio_seek(StringIOObject *self, PyObject *args)
{
@@ -312,6 +409,7 @@
CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "n|i:seek", &pos, &mode))
return NULL;
+ CHECK_CLOSED(self);
if (mode != 0 && mode != 1 && mode != 2) {
PyErr_Format(PyExc_ValueError,
@@ -344,6 +442,12 @@
return PyLong_FromSsize_t(self->pos);
}
+PyDoc_STRVAR(stringio_write_doc,
+ "Write string to file.\n"
+ "\n"
+ "Returns the number of characters written, which is always equal to\n"
+ "the length of the string.\n");
+
static PyObject *
stringio_write(StringIOObject *self, PyObject *obj)
{
@@ -355,6 +459,7 @@
Py_TYPE(obj)->tp_name);
return NULL;
}
+ CHECK_CLOSED(self);
size = PyUnicode_GET_SIZE(obj);
if (size > 0 && write_str(self, obj) < 0)
@@ -363,13 +468,33 @@
return PyLong_FromSsize_t(size);
}
+PyDoc_STRVAR(stringio_close_doc,
+ "Close the IO object. Attempting any further operation after the\n"
+ "object is closed will raise a ValueError.\n"
+ "\n"
+ "This method has no effect if the file is already closed.\n");
+
+static PyObject *
+stringio_close(StringIOObject *self)
+{
+ self->closed = 1;
+ /* Free up some memory */
+ if (resize_buffer(self, 0) < 0)
+ return NULL;
+ Py_CLEAR(self->readnl);
+ Py_CLEAR(self->writenl);
+ Py_CLEAR(self->decoder);
+ Py_RETURN_NONE;
+}
+
static void
stringio_dealloc(StringIOObject *self)
{
Py_CLEAR(self->readnl);
Py_CLEAR(self->writenl);
Py_CLEAR(self->decoder);
- PyMem_Free(self->buf);
+ if (self->buf)
+ PyMem_Free(self->buf);
Py_TYPE(self)->tp_free(self);
}
@@ -472,11 +597,12 @@
}
self->pos = 0;
-
+ self->closed = 0;
self->ok = 1;
return 0;
}
+/* Properties and pseudo-properties */
static PyObject *
stringio_seekable(StringIOObject *self, PyObject *args)
{
@@ -507,9 +633,17 @@
}
static PyObject *
+stringio_closed(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ return PyBool_FromLong(self->closed);
+}
+
+static PyObject *
stringio_encoding(StringIOObject *self, void *context)
{
CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyUnicode_FromString("utf-8");
}
@@ -517,6 +651,7 @@
stringio_errors(StringIOObject *self, void *context)
{
CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
return PyUnicode_FromString("strict");
}
@@ -524,25 +659,39 @@
stringio_line_buffering(StringIOObject *self, void *context)
{
CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
Py_RETURN_FALSE;
}
+static PyObject *
+stringio_newlines(StringIOObject *self, void *context)
+{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+ if (self->decoder == NULL)
+ Py_RETURN_NONE;
+ return PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
+}
+
static struct PyMethodDef stringio_methods[] = {
- {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, NULL},
- {"read", (PyCFunction)stringio_read, METH_VARARGS, NULL},
- {"readline", (PyCFunction)stringio_readline, METH_VARARGS, NULL},
- {"tell", (PyCFunction)stringio_tell, METH_NOARGS, NULL},
- {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, NULL},
- {"seek", (PyCFunction)stringio_seek, METH_VARARGS, NULL},
- {"write", (PyCFunction)stringio_write, METH_O, NULL},
+ {"close", (PyCFunction)stringio_close, METH_NOARGS, stringio_close_doc},
+ {"getvalue", (PyCFunction)stringio_getvalue, METH_VARARGS, stringio_getvalue_doc},
+ {"read", (PyCFunction)stringio_read, METH_VARARGS, stringio_read_doc},
+ {"readline", (PyCFunction)stringio_readline, METH_VARARGS, stringio_readline_doc},
+ {"tell", (PyCFunction)stringio_tell, METH_NOARGS, stringio_tell_doc},
+ {"truncate", (PyCFunction)stringio_truncate, METH_VARARGS, stringio_truncate_doc},
+ {"seek", (PyCFunction)stringio_seek, METH_VARARGS, stringio_seek_doc},
+ {"write", (PyCFunction)stringio_write, METH_O, stringio_write_doc},
- {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
- {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
- {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
+ {"seekable", (PyCFunction)stringio_seekable, METH_NOARGS},
+ {"readable", (PyCFunction)stringio_readable, METH_NOARGS},
+ {"writable", (PyCFunction)stringio_writable, METH_NOARGS},
{NULL, NULL} /* sentinel */
};
static PyGetSetDef stringio_getset[] = {
+ {"closed", (getter)stringio_closed, NULL, NULL},
+ {"newlines", (getter)stringio_newlines, NULL, NULL},
/* (following comments straight off of the original Python wrapper:)
XXX Cruft to support the TextIOWrapper API. This would only
be meaningful if StringIO supported the buffer attribute.
@@ -558,7 +707,7 @@
PyTypeObject PyStringIO_Type = {
PyVarObject_HEAD_INIT(NULL, 0)
- "_StringIO", /*tp_name*/
+ "StringIO", /*tp_name*/
sizeof(StringIOObject), /*tp_basicsize*/
0, /*tp_itemsize*/
(destructor)stringio_dealloc, /*tp_dealloc*/
@@ -577,13 +726,13 @@
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
- 0, /*tp_doc*/
+ stringio_doc, /*tp_doc*/
0, /*tp_traverse*/
0, /*tp_clear*/
0, /*tp_richcompare*/
0, /*tp_weaklistoffset*/
0, /*tp_iter*/
- 0, /*tp_iternext*/
+ (iternextfunc)stringio_iternext, /*tp_iternext*/
stringio_methods, /*tp_methods*/
0, /*tp_members*/
stringio_getset, /*tp_getset*/
Modified: python/branches/io-c/Modules/io.c
==============================================================================
--- python/branches/io-c/Modules/io.c (original)
+++ python/branches/io-c/Modules/io.c Sat Feb 21 19:54:01 2009
@@ -671,8 +671,8 @@
ADD_TYPE(&PyBytesIO_Type, "BytesIO");
/* StringIO */
- /* PyStringIO_Type.tp_base = &PyTextIOBase_Type; */
- ADD_TYPE(&PyStringIO_Type, "_StringIO");
+ PyStringIO_Type.tp_base = &PyTextIOBase_Type;
+ ADD_TYPE(&PyStringIO_Type, "StringIO");
/* BufferedReader */
PyBufferedReader_Type.tp_base = &PyBufferedIOBase_Type;
More information about the Python-checkins
mailing list