[Python-checkins] python/dist/src/Objects fileobject.c,2.165,2.166

gvanrossum@users.sourceforge.net gvanrossum@users.sourceforge.net
Tue, 06 Aug 2002 08:55:31 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv27731/Objects

Modified Files:
	fileobject.c 
Log Message:
SF patch 580331 by Oren Tirosh: make file objects their own iterator.

For a file f, iter(f) now returns f (unless f is closed), and f.next()
is similar to f.readline() when EOF is not reached; however, f.next()
uses a readahead buffer that messes up the file position, so mixing
f.next() and f.readline() (or other methods) doesn't work right.
Calling f.seek() drops the readahead buffer, but other operations
don't.

The real purpose of this change is to reduce the confusion between
objects and their iterators.  By making a file its own iterator, it's
made clearer that using the iterator modifies the file object's state
(in particular the current position).

A nice side effect is that this speeds up "for line in f:" by not
having to use the xreadlines module.  The f.xreadlines() method is
still supported for backwards compatibility, though it is the same as
iter(f) now.

(I made some cosmetic changes to Oren's code, and added a test for
"file closed" to file_iternext() and file_iter().)



Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.165
retrieving revision 2.166
diff -C2 -d -r2.165 -r2.166
*** fileobject.c	14 Jul 2002 22:14:19 -0000	2.165
--- fileobject.c	6 Aug 2002 15:55:28 -0000	2.166
***************
*** 1,3 ****
- 
  /* File object implementation */
  
--- 1,2 ----
***************
*** 117,120 ****
--- 116,120 ----
  	f->f_softspace = 0;
  	f->f_binary = strchr(mode,'b') != NULL;
+ 	f->f_buf = NULL;
  #ifdef WITH_UNIVERSAL_NEWLINES
  	f->f_univ_newline = (strchr(mode, 'U') != NULL);
***************
*** 272,275 ****
--- 272,277 ----
  }
  
+ void drop_readahead(PyFileObject *);
+ 
  /* Methods */
  
***************
*** 284,287 ****
--- 286,290 ----
  	Py_XDECREF(f->f_name);
  	Py_XDECREF(f->f_mode);
+ 	drop_readahead(f);
  	f->ob_type->tp_free((PyObject *)f);
  }
***************
*** 406,409 ****
--- 409,413 ----
  	if (f->f_fp == NULL)
  		return err_closed();
+ 	drop_readahead(f);
  	whence = 0;
  	if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
***************
*** 1179,1204 ****
  
  static PyObject *
- file_xreadlines(PyFileObject *f)
- {
- 	static PyObject* xreadlines_function = NULL;
- 
- 	if (f->f_fp == NULL)
- 		return err_closed();
- 	if (!xreadlines_function) {
- 		PyObject *xreadlines_module =
- 			PyImport_ImportModule("xreadlines");
- 		if(!xreadlines_module)
- 			return NULL;
- 
- 		xreadlines_function = PyObject_GetAttrString(xreadlines_module,
- 							     "xreadlines");
- 		Py_DECREF(xreadlines_module);
- 		if(!xreadlines_function)
- 			return NULL;
- 	}
- 	return PyObject_CallFunction(xreadlines_function, "(O)", f);
- }
- 
- static PyObject *
  file_readlines(PyFileObject *f, PyObject *args)
  {
--- 1183,1186 ----
***************
*** 1463,1466 ****
--- 1445,1457 ----
  }
  
+ static PyObject *
+ file_getiter(PyFileObject *f)
+ {
+ 	if (f->f_fp == NULL)
+ 		return err_closed();
+ 	Py_INCREF(f);
+ 	return (PyObject *)f;
+ }
+ 
  PyDoc_STRVAR(readline_doc,
  "readline([size]) -> next line from the file, as a string.\n"
***************
*** 1518,1525 ****
  
  PyDoc_STRVAR(xreadlines_doc,
! "xreadlines() -> next line from the file, as a string.\n"
  "\n"
! "Equivalent to xreadlines.xreadlines(file).  This is like readline(), but\n"
! "often quicker, due to reading ahead internally.");
  
  PyDoc_STRVAR(writelines_doc,
--- 1509,1516 ----
  
  PyDoc_STRVAR(xreadlines_doc,
! "xreadlines() -> returns self.\n"
  "\n"
! "For backward compatibility. File objects now include the performance\n"
! "optimizations previously implemented in the xreadlines module.");
  
  PyDoc_STRVAR(writelines_doc,
***************
*** 1555,1559 ****
  	{"readinto",	(PyCFunction)file_readinto,   METH_VARARGS, readinto_doc},
  	{"readlines",	(PyCFunction)file_readlines,  METH_VARARGS, readlines_doc},
! 	{"xreadlines",	(PyCFunction)file_xreadlines, METH_NOARGS,  xreadlines_doc},
  	{"writelines",	(PyCFunction)file_writelines, METH_O,	    writelines_doc},
  	{"flush",	(PyCFunction)file_flush,      METH_NOARGS,  flush_doc},
--- 1546,1550 ----
  	{"readinto",	(PyCFunction)file_readinto,   METH_VARARGS, readinto_doc},
  	{"readlines",	(PyCFunction)file_readlines,  METH_VARARGS, readlines_doc},
! 	{"xreadlines",	(PyCFunction)file_getiter,    METH_NOARGS,  xreadlines_doc},
  	{"writelines",	(PyCFunction)file_writelines, METH_O,	    writelines_doc},
  	{"flush",	(PyCFunction)file_flush,      METH_NOARGS,  flush_doc},
***************
*** 1618,1627 ****
  };
  
  static PyObject *
! file_getiter(PyObject *f)
  {
! 	return PyObject_CallMethod(f, "xreadlines", "");
  }
  
  static PyObject *
  file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
--- 1609,1726 ----
  };
  
+ void
+ drop_readahead(PyFileObject *f)
+ {
+ 	if (f->f_buf != NULL) {
+ 		PyMem_Free(f->f_buf);
+ 		f->f_buf = NULL;
+ 	}
+ }
+ 
+ /* Make sure that file has a readahead buffer with at least one byte 
+    (unless at EOF) and no more than bufsize.  Returns negative value on 
+    error */
+ int readahead(PyFileObject *f, int bufsize) {
+ 	int chunksize;
+ 
+ 	if (f->f_buf != NULL) {
+ 		if( (f->f_bufend - f->f_bufptr) >= 1) 
+ 			return 0;
+ 		else
+ 			drop_readahead(f);
+ 	}
+ 	if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
+ 		return -1;
+ 	}
+ 	Py_BEGIN_ALLOW_THREADS
+ 	errno = 0;
+ 	chunksize = Py_UniversalNewlineFread(
+ 		f->f_buf, bufsize, f->f_fp, (PyObject *)f);
+ 	Py_END_ALLOW_THREADS
+ 	if (chunksize == 0) {
+ 		if (ferror(f->f_fp)) {
+ 			PyErr_SetFromErrno(PyExc_IOError);
+ 			clearerr(f->f_fp);
+ 			drop_readahead(f);
+ 			return -1;
+ 		}
+ 	}
+ 	f->f_bufptr = f->f_buf;
+ 	f->f_bufend = f->f_buf + chunksize;
+ 	return 0;
+ }
+ 
+ /* Used by file_iternext.  The returned string will start with 'skip'
+    uninitialized bytes followed by the remainder of the line. Don't be 
+    horrified by the recursive call: maximum recursion depth is limited by 
+    logarithmic buffer growth to about 50 even when reading a 1gb line. */
+ 
+ PyStringObject *
+ readahead_get_line_skip(PyFileObject *f, int skip, int bufsize) {
+ 	PyStringObject* s;
+ 	char *bufptr;
+ 	char *buf;
+ 	int len;
+ 
+ 	if (f->f_buf == NULL)
+ 		if (readahead(f, bufsize) < 0) 
+ 			return NULL;
+ 
+ 	len = f->f_bufend - f->f_bufptr;
+ 	if (len == 0) 
+ 		return (PyStringObject *)
+ 			PyString_FromStringAndSize(NULL, skip);
+ 	bufptr = memchr(f->f_bufptr, '\n', len);
+ 	if (bufptr != NULL) {
+ 		bufptr++;			/* Count the '\n' */
+ 		len = bufptr - f->f_bufptr;
+ 		s = (PyStringObject *)
+ 			PyString_FromStringAndSize(NULL, skip+len);
+ 		if (s == NULL) 
+ 			return NULL;
+ 		memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
+ 		f->f_bufptr = bufptr;
+ 		if (bufptr == f->f_bufend)
+ 			drop_readahead(f);
+ 	} else {
+ 		bufptr = f->f_bufptr;
+ 		buf = f->f_buf;
+ 		f->f_buf = NULL; 	/* Force new readahead buffer */
+                 s = readahead_get_line_skip(
+ 			f, skip+len, bufsize + (bufsize>>2) );
+ 		if (s == NULL) {
+ 		        PyMem_Free(buf);
+ 			return NULL;
+ 		}
+ 		memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
+ 		PyMem_Free(buf);
+ 	}
+ 	return s;
+ }
+ 
+ /* A larger buffer size may actually decrease performance. */
+ #define READAHEAD_BUFSIZE 8192
+ 
  static PyObject *
! file_iternext(PyFileObject *f)
  {
! 	PyStringObject* l;
! 
! 	int i;
! 
! 	if (f->f_fp == NULL)
! 		return err_closed();
! 
! 	i = f->f_softspace;
! 
! 	l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
! 	if (l == NULL || PyString_GET_SIZE(l) == 0) {
! 		Py_XDECREF(l);
! 		return NULL;
! 	}
! 	return (PyObject *)l;
  }
  
+ 
  static PyObject *
  file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
***************
*** 1743,1748 ****
  	0,					/* tp_richcompare */
  	0,					/* tp_weaklistoffset */
! 	file_getiter,				/* tp_iter */
! 	0,					/* tp_iternext */
  	file_methods,				/* tp_methods */
  	file_memberlist,			/* tp_members */
--- 1842,1847 ----
  	0,					/* tp_richcompare */
  	0,					/* tp_weaklistoffset */
! 	(getiterfunc)file_getiter,		/* tp_iter */
! 	(iternextfunc)file_iternext,		/* tp_iternext */
  	file_methods,				/* tp_methods */
  	file_memberlist,			/* tp_members */