[Python-checkins] CVS: python/dist/src/Objects fileobject.c,2.99,2.100

Tim Peters python-dev@python.org
Sun, 07 Jan 2001 16:53:15 -0800


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv9879/python/dist/src/objects

Modified Files:
	fileobject.c 
Log Message:
Fiddled ms_getline_hack after talking w/ Guido:  made clearer that the
code duplication is to let us get away without a realloc whenever possible;
boosted the init buf size (the cutoff at which we *can* get away without
a realloc) from 100 to 200 so that more files can enjoy this boost; and
allowed other threads to run in all cases.  The last two cost something,
but not significantly:  in my fat test case, less than a 1% slowdown total.
Since my test case has a great many short lines, that's probably the worst
slowdown, too.  While the logic barely changed, there were lots of edits.
This also gets rid of the reference to fp->_cnt, so the last platform
assumption being made here is that fgets doesn't overwrite bytes
capriciously (== beyond the terminating null byte it must write).


Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.99
retrieving revision 2.100
diff -C2 -r2.99 -r2.100
*** fileobject.c	2001/01/07 21:19:34	2.99
--- fileobject.c	2001/01/08 00:53:12	2.100
***************
*** 655,661 ****
  stdio buffer, and we optimize heavily for that case.
  
! CAUTION:  This routine cheats, relying on how MSVC 6 works internally.
! They seem to be relatively safe cheats, but we should expect this code
! to break someday.
  **************************************************************************/
  
--- 655,662 ----
  stdio buffer, and we optimize heavily for that case.
  
! CAUTION:  This routine cheats, relying on that MSVC 6 fgets doesn't overwrite
! any buffer positions to the right of the terminating null byte.  Seems
! unlikely that will change in the future, but ... std test test_bufio should
! catch it if that changes.
  **************************************************************************/
  
***************
*** 669,673 ****
  ms_getline_hack(FILE *fp)
  {
! #define INITBUFSIZE 100
  #define INCBUFSIZE 1000
  	PyObject* v;	/* the string object result */
--- 670,684 ----
  ms_getline_hack(FILE *fp)
  {
! /* INITBUFSIZE is the maximum line length that lets us get away with the fast
!  * no-realloc path.  get_line uses 100 for its initial size, but isn't trying
!  * to avoid reallocs.  Under MSVC 6, and using files with lines all under 100
!  * chars long, dropping this from 200 to 100 bought less than 1% speedup.
!  * Since many kinds of log files have lines exceeding 100 chars, the tiny
!  * slowdown from using 200 is more than offset by the large speedup for such
!  * log files.
!  * INCBUFSIZE is the amount by which we grow the buffer, if INITBUFSIZE isn't
!  * enough.  It doesn't much matter what this set to.
!  */
! #define INITBUFSIZE 200
  #define INCBUFSIZE 1000
  	PyObject* v;	/* the string object result */
***************
*** 676,748 ****
  	char* pvend;    /* address one beyond last free slot */
  	char* p;	/* temp */
  
! 	if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */
! 		/* optimize for normal case:  something sitting in the
! 		 * buffer ready to go; avoid thread fiddling & realloc
! 		 * if possible
! 		 */
! 		char msbuf[INITBUFSIZE];
! 		memset(msbuf, '\n', INITBUFSIZE);
! 		p = fgets(msbuf, INITBUFSIZE, fp);
! 		/* since we didn't lock the file, there's no guarantee
! 		 * anything was still in the buffer
  		 */
! 		if (p == NULL) {
! 			clearerr(fp);
! 			if (PyErr_CheckSignals())
! 				return NULL;
! 			v = PyString_FromStringAndSize("", 0);
! 			return v;
! 		}
! 		/* fgets read *something* */
! 		p = memchr(msbuf, '\n', INITBUFSIZE);
! 		if (p != NULL) {
! 			/* Did the \n come from fgets or from us?
! 			 * Since fgets stops at the first \n, and then
! 			 * writes \0, if it's from fgets a \0 must be next.
! 			 * But if that's so, it could not have come from us,
! 			 * since the \n's we filled the buffer with have only
! 			 * more \n's to the right.
! 			 */
! 			pvend = msbuf + INITBUFSIZE;
! 			if (p+1 < pvend && *(p+1) == '\0') {
! 				/* it's from fgets:  we win! */
! 				v = PyString_FromStringAndSize(msbuf,
! 					p - msbuf + 1);
! 				return v;
! 			}
! 			/* Must be from us:  fgets didn't fill the buffer
! 			 * and didn't find a newline, so it must be the
! 			 * last and newline-free line of the file.
  			 */
! 			assert(p > msbuf && *(p-1) == '\0');
! 			v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
  			return v;
  		}
! 		/* yuck:  fgets overwrote all the newlines, i.e. the entire
! 		 * buffer.  So this line isn't over yet, or maybe it is but
! 		 * we're exactly at EOF; in either case, we're tired <wink>.
  		 */
! 		assert(msbuf[INITBUFSIZE-1] == '\0');
! 		total_v_size = INITBUFSIZE + INCBUFSIZE;
! 		v = PyString_FromStringAndSize((char*)NULL,
! 			(int)total_v_size);
! 		if (v == NULL)
! 			return v;
! 		/* copy over everything except the last null byte */
! 		memcpy(BUF(v), msbuf, INITBUFSIZE-1);
! 		pvfree = BUF(v) + INITBUFSIZE - 1;
  	}
! 	else {
! 		/* The stream isn't ready or isn't buffered. */
! 		v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE);
! 		if (v == NULL)
! 			return v;
! 		total_v_size = INITBUFSIZE;
! 		pvfree = BUF(v);
! 	}
  
  	/* Keep reading stuff into v; if it ever ends successfully, break
! 	 * after setting p one beyond the end of the line.
  	 */
  	for (;;) {
--- 687,751 ----
  	char* pvend;    /* address one beyond last free slot */
  	char* p;	/* temp */
+ 	char msbuf[INITBUFSIZE];
  
! 	/* Optimize for normal case:  avoid _PyString_Resize if at all
! 	 * possible via first reading into auto msbuf.
! 	 */
! 	Py_BEGIN_ALLOW_THREADS
! 	memset(msbuf, '\n', INITBUFSIZE);
! 	p = fgets(msbuf, INITBUFSIZE, fp);
! 	Py_END_ALLOW_THREADS
! 
! 	if (p == NULL) {
! 		clearerr(fp);
! 		if (PyErr_CheckSignals())
! 			return NULL;
! 		v = PyString_FromStringAndSize("", 0);
! 		return v;
! 	}
! 	/* fgets read *something* */
! 	p = memchr(msbuf, '\n', INITBUFSIZE);
! 	if (p != NULL) {
! 		/* Did the \n come from fgets or from us?
! 		 * Since fgets stops at the first \n, and then writes \0, if
! 		 * it's from fgets a \0 must be next.  But if that's so, it
! 		 * could not have come from us, since the \n's we filled the
! 		 * buffer with have only more \n's to the right.
  		 */
! 		pvend = msbuf + INITBUFSIZE;
! 		if (p+1 < pvend && *(p+1) == '\0') {
! 			/* It's from fgets:  we win!  In particular, we
! 			 * haven't done any mallocs yet, and can build the
! 			 * final result on the first try.
  			 */
! 			v = PyString_FromStringAndSize(msbuf, p - msbuf + 1);
  			return v;
  		}
! 		/* Must be from us:  fgets didn't fill the buffer and didn't
! 		 * find a newline, so it must be the last and newline-free
! 		 * line of the file.
  		 */
! 		assert(p > msbuf && *(p-1) == '\0');
! 		v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
! 		return v;
  	}
! 	/* yuck:  fgets overwrote all the newlines, i.e. the entire buffer.
! 	 * So this line isn't over yet, or maybe it is but we're exactly at
! 	 *EOF; in either case, we're tired <wink>.
! 	 */
! 	assert(msbuf[INITBUFSIZE-1] == '\0');
! 	total_v_size = INITBUFSIZE + INCBUFSIZE;
! 	v = PyString_FromStringAndSize((char*)NULL,
! 		(int)total_v_size);
! 	if (v == NULL)
! 		return v;
! 	/* copy over everything except the last null byte */
! 	memcpy(BUF(v), msbuf, INITBUFSIZE-1);
! 	pvfree = BUF(v) + INITBUFSIZE - 1;
  
  	/* Keep reading stuff into v; if it ever ends successfully, break
! 	 * after setting p one beyond the end of the line.  The code here is
! 	 * very much like the code above, except reads into v's buffer; see
! 	 * the code above for detailed comments about the logic.
  	 */
  	for (;;) {
***************
*** 765,769 ****
  			break;
  		}
- 		/* See the "normal case" comments above for details. */
  		p = memchr(pvfree, '\n', nfree);
  		if (p != NULL) {
--- 768,771 ----