[Python-checkins] r68518 - sandbox/trunk/io-c/_textio.c
antoine.pitrou
python-checkins at python.org
Sun Jan 11 02:38:50 CET 2009
Author: antoine.pitrou
Date: Sun Jan 11 02:38:50 2009
New Revision: 68518
Log:
Text IO changes:
- readline() twice as fast
- introduce a CHECK_CLOSED macro
- introduce a CHECK_INITIALIZED macro
Modified:
sandbox/trunk/io-c/_textio.c
Modified: sandbox/trunk/io-c/_textio.c
==============================================================================
--- sandbox/trunk/io-c/_textio.c (original)
+++ sandbox/trunk/io-c/_textio.c Sun Jan 11 02:38:50 2009
@@ -450,6 +450,7 @@
typedef struct
{
PyObject_HEAD
+ int ok; /* initialized? */
Py_ssize_t chunk_size;
PyObject *buffer;
PyObject *encoding;
@@ -492,6 +493,7 @@
PyObject *res;
int r;
+ self->ok = 0;
if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|zzzi:fileio",
kwlist, &buffer, &encoding, &errors,
&newline, &line_buffering))
@@ -609,6 +611,7 @@
self->seekable = self->telling = PyObject_IsTrue(res);
Py_DECREF(res);
+ self->ok = 1;
return 0;
error:
@@ -620,7 +623,7 @@
{
PyObject *res;
/* XXX this is inelegant */
- if (Py_TYPE(self)->tp_del == NULL) {
+ if (Py_TYPE(self)->tp_del == NULL && self->ok) {
/* We need to resurrect the object as calling close() can invoke
arbitrary code. */
((PyObject *) self)->ob_refcnt++;
@@ -635,6 +638,7 @@
if (--((PyObject *) self)->ob_refcnt != 0)
return;
}
+ self->ok = 0;
Py_CLEAR(self->buffer);
Py_CLEAR(self->encoding);
Py_CLEAR(self->encoder);
@@ -648,6 +652,42 @@
Py_TYPE(self)->tp_free((PyObject *)self);
}
+static PyObject *
+TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context);
+
+/* This macro takes some shortcuts to make the common case faster. We could
+ specialize even more, by detecting that the underlying buffer is a
+ BufferedObject. */
+#define CHECK_CLOSED(self) \
+ do { \
+ int r; \
+ PyObject *_res; \
+ if (Py_TYPE(self) == &PyTextIOWrapper_Type) { \
+ _res = TextIOWrapper_closed_get(self, NULL); \
+ if (_res == NULL) \
+ return NULL; \
+ r = PyObject_IsTrue(_res); \
+ Py_DECREF(_res); \
+ if (r < 0) \
+ return NULL; \
+ if (r > 0) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on closed file."); \
+ return NULL; \
+ } \
+ } \
+ else if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL) \
+ return NULL; \
+ } while (0)
+
+#define CHECK_INITIALIZED(self) \
+ if (self->ok <= 0) { \
+ PyErr_SetString(PyExc_ValueError, \
+ "I/O operation on uninitialized object"); \
+ return NULL; \
+ }
+
+
Py_LOCAL_INLINE(const Py_UNICODE *)
findchar(const Py_UNICODE *s, Py_ssize_t size, Py_UNICODE ch)
{
@@ -670,12 +710,13 @@
int haslf = 0;
int needflush = 0;
+ CHECK_INITIALIZED(self);
+
if (!PyArg_ParseTuple(args, "U:write", &text)) {
return NULL;
}
- if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL)
- return NULL;
+ CHECK_CLOSED(self);
Py_INCREF(text);
@@ -873,15 +914,20 @@
}
+
static PyObject *
TextIOWrapper_read(PyTextIOWrapperObject *self, PyObject *args)
{
Py_ssize_t n = -1;
PyObject *result;
+ CHECK_INITIALIZED(self);
+
if (!PyArg_ParseTuple(args, "|n:read", &n))
return NULL;
+ CHECK_CLOSED(self);
+
if (n < 0) {
/* Read everything */
PyObject *bytes = PyObject_CallMethod(self->buffer, "read", NULL);
@@ -935,9 +981,10 @@
return NULL;
}
-/* It is assumed that end points to the real end of the Py_UNICODE storage,
- that is to the NUL character. Otherwise the function will produce incorrect
- results. */
+
+/* NOTE: `end` must point to the real end of the Py_UNICODE storage,
+ that is to the NUL character. Otherwise the function will produce
+ incorrect results. */
static Py_UNICODE *
find_control_char(Py_UNICODE *start, Py_UNICODE *end, Py_UNICODE ch)
{
@@ -953,151 +1000,236 @@
}
}
-static PyObject *
-_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
+/* Finds the first line ending between start and end.
+ If not found, returns -1 and sets (*consumed) to the number of characters
+ which can be safely put aside before another search.
+ If found, returns the index after the line ending and doesn't touch
+ (*consumed).
+
+ NOTE: `end` must point to the real end of the Py_UNICODE storage,
+ that is to the NUL character. Otherwise the function will produce
+ incorrect results. */
+static Py_ssize_t
+find_line_ending(PyTextIOWrapperObject *self,
+ Py_UNICODE *start, Py_UNICODE *end, Py_ssize_t *consumed)
{
- PyObject *line;
- Py_ssize_t start, endpos;
- int res;
-
- if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL)
- return NULL;
+ Py_ssize_t len = end - start;
- /* Grab all the decoded text (we will rewind any extra bits later). */
- line = TextIOWrapper_get_decoded_chars(self, -1);
- if (line == NULL)
- return NULL;
-
- start = 0;
-
- endpos = -1;
-
- while (1) {
- Py_UNICODE* ptr = PyUnicode_AS_UNICODE(line);
- if (self->readtranslate) {
- /* Newlines are already translated, only search for \n */
- Py_UNICODE *pos = find_control_char(ptr + start,
- ptr + PyUnicode_GET_SIZE(line),
- '\n');
- if (pos != NULL) {
- endpos = pos - ptr + 1;
- break;
- }
- else
- start = PyUnicode_GET_SIZE(line);
+ if (self->readtranslate) {
+ /* Newlines are already translated, only search for \n */
+ Py_UNICODE *pos = find_control_char(start, end, '\n');
+ if (pos != NULL)
+ return pos - start + 1;
+ else {
+ *consumed = len;
+ return -1;
}
- else if (self->readuniversal) {
- /* Universal newline search. Find any of \r, \r\n, \n
- * The decoder ensures that \r\n are not split in two pieces
- */
- Py_UNICODE *s = ptr + start;
- Py_UNICODE *e = ptr + PyUnicode_GET_SIZE(line);
- for (;;) {
- Py_UNICODE ch;
- /* Fast path for non-control chars. The loop always ends
- since the Py_UNICODE storage is NUL-terminated. */
- while (*s > '\r')
- s++;
- if (s == e)
- goto _universal_not_found;
- ch = *s++;
- if (ch == '\n') {
- endpos = s - ptr;
- break;
- }
- if (ch == '\r') {
- if (*s == '\n')
- endpos = s - ptr + 1;
- else
- endpos = s - ptr;
- break;
- }
+ }
+ else if (self->readuniversal) {
+ /* Universal newline search. Find any of \r, \r\n, \n
+ * The decoder ensures that \r\n are not split in two pieces
+ */
+ Py_UNICODE *s = start;
+ for (;;) {
+ Py_UNICODE ch;
+ /* Fast path for non-control chars. The loop always ends
+ since the Py_UNICODE storage is NUL-terminated. */
+ while (*s > '\r')
+ s++;
+ if (s >= end) {
+ *consumed = len;
+ return -1;
}
- break;
- _universal_not_found:
- start = PyUnicode_GET_SIZE(line);
+ ch = *s++;
+ if (ch == '\n')
+ return s - start;
+ if (ch == '\r') {
+ if (*s == '\n')
+ return s - start + 1;
+ else
+ return s - start;
+ }
+ }
+ }
+ else {
+ /* Non-universal mode. */
+ Py_ssize_t readnl_len = PyUnicode_GET_SIZE(self->readnl);
+ Py_UNICODE *nl = PyUnicode_AS_UNICODE(self->readnl);
+ if (readnl_len == 1) {
+ Py_UNICODE *pos = find_control_char(start, end, nl[0]);
+ if (pos != NULL)
+ return pos - start + 1;
+ *consumed = len;
+ return -1;
}
else {
- /* Non-universal mode. */
- Py_ssize_t readnl_len = PyUnicode_GET_SIZE(self->readnl);
- Py_ssize_t line_len = PyUnicode_GET_SIZE(line);
- if (readnl_len <= line_len) {
- if (readnl_len == 1) {
- Py_UNICODE *pos = find_control_char(
- ptr + start,
- ptr + line_len,
- PyUnicode_AS_UNICODE(self->readnl)[0]);
- if (pos != NULL) {
- endpos = pos - ptr + 1;
- break;
- }
- start = PyUnicode_GET_SIZE(line);
- }
- else {
- Py_ssize_t pos = PyUnicode_Find(line, self->readnl,
- start, line_len, 1);
- if (pos >= 0) {
- endpos = pos + readnl_len;
+ Py_UNICODE *s = start;
+ Py_UNICODE *e = end - readnl_len + 1;
+ Py_UNICODE *pos;
+ if (e < s)
+ e = s;
+ while (s < e) {
+ Py_ssize_t i;
+ Py_UNICODE *pos = find_control_char(s, end, nl[0]);
+ if (pos == NULL || pos >= e)
+ break;
+ for (i = 1; i < readnl_len; i++) {
+ if (pos[i] != nl[i])
break;
- }
- start = line_len - readnl_len + 1;
}
+ if (i == readnl_len)
+ return pos - start + readnl_len;
+ s = pos + 1;
}
+ pos = find_control_char(e, end, nl[0]);
+ if (pos == NULL)
+ *consumed = len;
+ else
+ *consumed = pos - start;
+ return -1;
}
+ }
+}
- if (limit >= 0 && PyUnicode_GET_SIZE(line) >= limit) {
- /* reached length limit */
- endpos = limit;
- break;
- }
+static PyObject *
+_TextIOWrapper_readline(PyTextIOWrapperObject *self, Py_ssize_t limit)
+{
+ PyObject *line = NULL, *chunks = NULL, *remaining = NULL;
+ Py_ssize_t start, endpos, chunked, offset_to_buffer;
+ int res;
+
+ CHECK_CLOSED(self);
+
+ chunked = 0;
- /* No line ending seen yet - get more data */
- while (1) {
+ while (1) {
+ Py_UNICODE *ptr;
+ Py_ssize_t line_len;
+ Py_ssize_t consumed = 0;
+
+ /* First, get some data if necessary */
+ res = 1;
+ while (!self->decoded_chars ||
+ !PyUnicode_GET_SIZE(self->decoded_chars)) {
res = TextIOWrapper_read_chunk(self);
if (res < 0)
goto error;
if (res == 0)
break;
- if (self->decoded_chars &&
- PyUnicode_GET_SIZE(self->decoded_chars))
- break;
}
if (res == 0) {
/* end of file */
TextIOWrapper_set_decoded_chars(self, NULL);
Py_CLEAR(self->snapshot);
- return line;
+ start = endpos = offset_to_buffer = 0;
+ break;
+ }
+
+ if (remaining == NULL) {
+ line = self->decoded_chars;
+ start = self->decoded_chars_used;
+ offset_to_buffer = 0;
+ Py_INCREF(line);
}
else {
- PyUnicode_AppendAndDel(&line,
- TextIOWrapper_get_decoded_chars(
- self, -1));
+ assert(self->decoded_chars_used == 0);
+ line = PyUnicode_Concat(remaining, self->decoded_chars);
+ start = 0;
+ offset_to_buffer = PyUnicode_GET_SIZE(remaining);
+ Py_CLEAR(remaining);
if (line == NULL)
goto error;
}
- }
- if (limit >= 0 && endpos > limit)
- endpos = limit; /* don't exceed limit */
+ ptr = PyUnicode_AS_UNICODE(line);
+ line_len = PyUnicode_GET_SIZE(line);
- /* Rewind decoded_chars to just after the line ending we found. */
- TextIOWrapper_rewind_decoded_chars(
- self, PyUnicode_GET_SIZE(line) - endpos);
-
- if (PyUnicode_GET_SIZE(line) != endpos) {
- PyObject *resized = PyUnicode_FromUnicode(
- PyUnicode_AS_UNICODE(line), endpos);
+ endpos = find_line_ending(self, ptr + start, ptr + line_len, &consumed);
+ if (endpos >= 0) {
+ endpos += start;
+ if (limit >= 0 && (endpos - start) + chunked >= limit)
+ endpos = start + limit - chunked;
+ break;
+ }
- if (resized == NULL)
- goto error;
+ /* We can put aside up to `endpos` */
+ endpos = consumed + start;
+ if (limit >= 0 && (endpos - start) + chunked >= limit) {
+ /* Didn't find line ending, but reached length limit */
+ endpos = start + limit - chunked;
+ break;
+ }
- Py_DECREF(line);
- line = resized;
+ if (endpos > start) {
+ /* No line ending seen yet - put aside current data */
+ PyObject *s;
+ if (chunks == NULL) {
+ chunks = PyList_New(0);
+ if (chunks == NULL)
+ goto error;
+ }
+ s = PyUnicode_FromUnicode(ptr + start, endpos - start);
+ if (s == NULL)
+ goto error;
+ if (PyList_Append(chunks, s) < 0) {
+ Py_DECREF(s);
+ goto error;
+ }
+ chunked += PyUnicode_GET_SIZE(s);
+ Py_DECREF(s);
+ }
+ /* There may be some remaining bytes we'll have to prepend to the
+ next chunk of data */
+ if (endpos < line_len) {
+ remaining = PyUnicode_FromUnicode(
+ ptr + endpos, line_len - endpos);
+ if (remaining == NULL)
+ goto error;
+ }
+ Py_CLEAR(line);
+ /* We have consumed the buffer */
+ TextIOWrapper_set_decoded_chars(self, NULL);
+ }
+
+ if (line != NULL) {
+ /* Our line ends in the current buffer */
+ self->decoded_chars_used = endpos - offset_to_buffer;
+ if (start > 0 || endpos < PyUnicode_GET_SIZE(line)) {
+ if (start == 0 && Py_REFCNT(line) == 1) {
+ if (PyUnicode_Resize(&line, endpos) < 0)
+ goto error;
+ }
+ else {
+ PyObject *s = PyUnicode_FromUnicode(
+ PyUnicode_AS_UNICODE(line) + start, endpos - start);
+ Py_CLEAR(line);
+ if (s == NULL)
+ goto error;
+ line = s;
+ }
+ }
+ }
+ if (chunks != NULL) {
+ if (remaining != NULL && PyList_Append(chunks, remaining) < 0)
+ goto error;
+ Py_CLEAR(remaining);
+ if (line != NULL && PyList_Append(chunks, line) < 0)
+ goto error;
+ Py_CLEAR(line);
+ line = PyUnicode_Join(PyUnicode_FromStringAndSize(NULL, 0), chunks);
+ if (line == NULL)
+ goto error;
+ Py_DECREF(chunks);
}
+ if (line == NULL)
+ line = PyUnicode_FromStringAndSize(NULL, 0);
+
return line;
error:
- Py_DECREF(line);
+ Py_XDECREF(chunks);
+ Py_XDECREF(remaining);
+ Py_XDECREF(line);
return NULL;
}
@@ -1106,6 +1238,7 @@
{
Py_ssize_t limit = -1;
+ CHECK_INITIALIZED(self);
if (!PyArg_ParseTuple(args, "|n:readline", &limit)) {
return NULL;
}
@@ -1234,6 +1367,8 @@
PyObject *res;
int cmp;
+ CHECK_INITIALIZED(self);
+
if (zero == NULL) {
zero = PyLong_FromLong(0L);
if (zero == NULL)
@@ -1244,8 +1379,7 @@
return NULL;
Py_INCREF(cookieObj);
- if (_PyIOBase_checkClosed((PyObject *)self, Py_True) == NULL)
- goto fail;
+ CHECK_CLOSED(self);
if (!self->seekable) {
PyErr_SetString(PyExc_IOError,
@@ -1404,6 +1538,9 @@
PyObject *saved_state = NULL;
char *input, *input_end;
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
+
if (!self->seekable) {
PyErr_SetString(PyExc_IOError,
"underlying stream is not seekable");
@@ -1561,36 +1698,43 @@
static PyObject *
TextIOWrapper_fileno(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
return PyObject_CallMethod(self->buffer, "fileno", NULL);
}
static PyObject *
TextIOWrapper_seekable(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
return PyObject_CallMethod(self->buffer, "seekable", NULL);
}
static PyObject *
TextIOWrapper_readable(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
return PyObject_CallMethod(self->buffer, "readable", NULL);
}
static PyObject *
TextIOWrapper_writable(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
return PyObject_CallMethod(self->buffer, "writable", NULL);
}
static PyObject *
TextIOWrapper_isatty(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
return PyObject_CallMethod(self->buffer, "isatty", NULL);
}
static PyObject *
TextIOWrapper_flush(PyTextIOWrapperObject *self, PyObject *args)
{
+ CHECK_INITIALIZED(self);
+ CHECK_CLOSED(self);
self->telling = self->seekable;
return PyObject_CallMethod(self->buffer, "flush", NULL);
}
@@ -1598,7 +1742,9 @@
static PyObject *
TextIOWrapper_close(PyTextIOWrapperObject *self, PyObject *args)
{
- PyObject *res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
+ PyObject *res;
+ CHECK_INITIALIZED(self);
+ res = PyObject_CallMethod((PyObject *)self, "flush", NULL);
if (res == NULL) {
/* If flush() fails, just give up */
PyErr_Clear();
@@ -1614,8 +1760,9 @@
{
PyObject *line;
- self->telling = 0;
+ CHECK_INITIALIZED(self);
+ self->telling = 0;
if (Py_TYPE(self) == &PyTextIOWrapper_Type) {
/* Skip method call overhead for speed */
line = _TextIOWrapper_readline(self, -1);
@@ -1645,12 +1792,14 @@
static PyObject *
TextIOWrapper_name_get(PyTextIOWrapperObject *self, void *context)
{
+ CHECK_INITIALIZED(self);
return PyObject_GetAttrString(self->buffer, "name");
}
static PyObject *
TextIOWrapper_closed_get(PyTextIOWrapperObject *self, void *context)
{
+ CHECK_INITIALIZED(self);
return PyObject_GetAttr(self->buffer, _PyIO_str_closed);
}
@@ -1658,6 +1807,7 @@
TextIOWrapper_newlines_get(PyTextIOWrapperObject *self, void *context)
{
PyObject *res;
+ CHECK_INITIALIZED(self);
if (self->decoder == NULL)
Py_RETURN_NONE;
res = PyObject_GetAttr(self->decoder, _PyIO_str_newlines);
More information about the Python-checkins
mailing list