[Python-checkins] CVS: python/dist/src/Objects fileobject.c,2.98,2.99

Tim Peters python-dev@python.org
Sun, 07 Jan 2001 13:19:36 -0800


Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv19540/python/dist/src/Objects

Modified Files:
	fileobject.c 
Log Message:
MS Win32 .readline() speedup, as discussed on Python-Dev.  This is a tricky
variant that never needs to "search from the right".
Also fixed unlikely memory leak in get_line, if string size overflows INTMAX.
Also new std test test_bufio to make sure .readline() works.


Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.98
retrieving revision 2.99
diff -C2 -r2.98 -r2.99
*** fileobject.c	2001/01/07 20:51:39	2.98
--- fileobject.c	2001/01/07 21:19:34	2.99
***************
*** 245,249 ****
     return 0 on success, non-zero on failure (with errno set) */
  int
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
  _portable_fseek(FILE *fp, fpos_t offset, int whence)
  #else
--- 245,249 ----
     return 0 on success, non-zero on failure (with errno set) */
  int
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
  _portable_fseek(FILE *fp, fpos_t offset, int whence)
  #else
***************
*** 257,261 ****
  #elif defined(__BEOS__)
  	return _fseek(fp, offset, whence);
! #elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8 
  	/* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable
  		fsetpos() and tell() to implement fseek()*/
--- 257,261 ----
  #elif defined(__BEOS__)
  	return _fseek(fp, offset, whence);
! #elif defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_FPOS_T >= 8
  	/* lacking a 64-bit capable fseek() (as Win64 does) use a 64-bit capable
  		fsetpos() and tell() to implement fseek()*/
***************
*** 288,292 ****
     Return -1 on failure with errno set appropriately, current file
     position on success */
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
  fpos_t
  #else
--- 288,292 ----
     Return -1 on failure with errno set appropriately, current file
     position on success */
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
  fpos_t
  #else
***************
*** 315,319 ****
  	int whence;
  	int ret;
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
  	fpos_t offset, pos;
  #else
--- 315,319 ----
  	int whence;
  	int ret;
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
  	fpos_t offset, pos;
  #else
***************
*** 321,325 ****
  #endif /* !MS_WIN64 */
  	PyObject *offobj;
! 	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 321,325 ----
  #endif /* !MS_WIN64 */
  	PyObject *offobj;
! 
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 335,339 ****
  	if (PyErr_Occurred())
  		return NULL;
! 	
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
--- 335,339 ----
  	if (PyErr_Occurred())
  		return NULL;
! 
  	Py_BEGIN_ALLOW_THREADS
  	errno = 0;
***************
*** 356,360 ****
  {
  	int ret;
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
  	fpos_t newsize;
  #else
--- 356,360 ----
  {
  	int ret;
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
  	fpos_t newsize;
  #else
***************
*** 362,366 ****
  #endif
  	PyObject *newsizeobj;
! 	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 362,366 ----
  #endif
  	PyObject *newsizeobj;
! 
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 417,421 ****
  	if (ret != 0) goto onioerror;
  #endif /* !MS_WIN32 */
! 	
  	Py_INCREF(Py_None);
  	return Py_None;
--- 417,421 ----
  	if (ret != 0) goto onioerror;
  #endif /* !MS_WIN32 */
! 
  	Py_INCREF(Py_None);
  	return Py_None;
***************
*** 431,435 ****
  file_tell(PyFileObject *f, PyObject *args)
  {
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8 
  	fpos_t pos;
  #else
--- 431,435 ----
  file_tell(PyFileObject *f, PyObject *args)
  {
! #if defined(HAVE_LARGEFILE_SUPPORT) && SIZEOF_OFF_T < 8 && SIZEOF_FPOS_T >= 8
  	fpos_t pos;
  #else
***************
*** 471,475 ****
  {
  	int res;
! 	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 471,475 ----
  {
  	int res;
! 
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 560,564 ****
  	size_t bytesread, buffersize, chunksize;
  	PyObject *v;
! 	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 560,564 ----
  	size_t bytesread, buffersize, chunksize;
  	PyObject *v;
! 
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 611,615 ****
  	char *ptr;
  	size_t ntodo, ndone, nnow;
! 	
  	if (f->f_fp == NULL)
  		return err_closed();
--- 611,615 ----
  	char *ptr;
  	size_t ntodo, ndone, nnow;
! 
  	if (f->f_fp == NULL)
  		return err_closed();
***************
*** 635,638 ****
--- 635,802 ----
  }
  
+ /**************************************************************************
+ Win32 MS routine to get next line.
+ 
+ Under MSVC 6:
+ 
+ + MS threadsafe getc is very slow (multiple layers of function calls
+   before+after each character, to lock+unlock the stream).
+ + The stream-locking functions are MS-internal -- can't access them
+   from user code.
+ + There's nothing Tim could find in the MS C or platform SDK libraries
+   that can worm around this.
+ + MS fgets locks/unlocks only once per line; it's the only hook we have.
+ 
+ So we use fgets for speed(!), despite that it's painful.
+ 
+ MS realloc is also slow.
+ 
+ In the usual case, we have one pleasantly small line already sitting in a
+ stdio buffer, and we optimize heavily for that case.
+ 
+ CAUTION:  This routine cheats, relying on how MSVC 6 works internally.
+ They seem to be relatively safe cheats, but we should expect this code
+ to break someday.
+ **************************************************************************/
+ 
+ /* if Win32 and MS's compiler */
+ #if defined(MS_WIN32) && defined(_MSC_VER)
+ #define USE_MS_GETLINE_HACK
+ #endif
+ 
+ #ifdef USE_MS_GETLINE_HACK
+ static PyObject*
+ ms_getline_hack(FILE *fp)
+ {
+ #define INITBUFSIZE 100
+ #define INCBUFSIZE 1000
+ 	PyObject* v;	/* the string object result */
+ 	size_t total_v_size;  /* total # chars in v's buffer */
+ 	char* pvfree;	/* address of next free slot */
+ 	char* pvend;    /* address one beyond last free slot */
+ 	char* p;	/* temp */
+ 
+ 	if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */
+ 		/* optimize for normal case:  something sitting in the
+ 		 * buffer ready to go; avoid thread fiddling & realloc
+ 		 * if possible
+ 		 */
+ 		char msbuf[INITBUFSIZE];
+ 		memset(msbuf, '\n', INITBUFSIZE);
+ 		p = fgets(msbuf, INITBUFSIZE, fp);
+ 		/* since we didn't lock the file, there's no guarantee
+ 		 * anything was still in the buffer
+ 		 */
+ 		if (p == NULL) {
+ 			clearerr(fp);
+ 			if (PyErr_CheckSignals())
+ 				return NULL;
+ 			v = PyString_FromStringAndSize("", 0);
+ 			return v;
+ 		}
+ 		/* fgets read *something* */
+ 		p = memchr(msbuf, '\n', INITBUFSIZE);
+ 		if (p != NULL) {
+ 			/* Did the \n come from fgets or from us?
+ 			 * Since fgets stops at the first \n, and then
+ 			 * writes \0, if it's from fgets a \0 must be next.
+ 			 * But if that's so, it could not have come from us,
+ 			 * since the \n's we filled the buffer with have only
+ 			 * more \n's to the right.
+ 			 */
+ 			pvend = msbuf + INITBUFSIZE;
+ 			if (p+1 < pvend && *(p+1) == '\0') {
+ 				/* it's from fgets:  we win! */
+ 				v = PyString_FromStringAndSize(msbuf,
+ 					p - msbuf + 1);
+ 				return v;
+ 			}
+ 			/* Must be from us:  fgets didn't fill the buffer
+ 			 * and didn't find a newline, so it must be the
+ 			 * last and newline-free line of the file.
+ 			 */
+ 			assert(p > msbuf && *(p-1) == '\0');
+ 			v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
+ 			return v;
+ 		}
+ 		/* yuck:  fgets overwrote all the newlines, i.e. the entire
+ 		 * buffer.  So this line isn't over yet, or maybe it is but
+ 		 * we're exactly at EOF; in either case, we're tired <wink>.
+ 		 */
+ 		assert(msbuf[INITBUFSIZE-1] == '\0');
+ 		total_v_size = INITBUFSIZE + INCBUFSIZE;
+ 		v = PyString_FromStringAndSize((char*)NULL,
+ 			(int)total_v_size);
+ 		if (v == NULL)
+ 			return v;
+ 		/* copy over everything except the last null byte */
+ 		memcpy(BUF(v), msbuf, INITBUFSIZE-1);
+ 		pvfree = BUF(v) + INITBUFSIZE - 1;
+ 	}
+ 	else {
+ 		/* The stream isn't ready or isn't buffered. */
+ 		v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE);
+ 		if (v == NULL)
+ 			return v;
+ 		total_v_size = INITBUFSIZE;
+ 		pvfree = BUF(v);
+ 	}
+ 
+ 	/* Keep reading stuff into v; if it ever ends successfully, break
+ 	 * after setting p one beyond the end of the line.
+ 	 */
+ 	for (;;) {
+ 		size_t nfree;
+ 
+ 		Py_BEGIN_ALLOW_THREADS
+ 		pvend = BUF(v) + total_v_size;
+ 		nfree = pvend - pvfree;
+ 		memset(pvfree, '\n', nfree);
+ 		p = fgets(pvfree, nfree, fp);
+ 		Py_END_ALLOW_THREADS
+ 
+ 		if (p == NULL) {
+ 			clearerr(fp);
+ 			if (PyErr_CheckSignals()) {
+ 				Py_DECREF(v);
+ 				return NULL;
+ 			}
+ 			p = pvfree;
+ 			break;
+ 		}
+ 		/* See the "normal case" comments above for details. */
+ 		p = memchr(pvfree, '\n', nfree);
+ 		if (p != NULL) {
+ 			if (p+1 < pvend && *(p+1) == '\0') {
+ 				/* \n came from fgets */
+ 				++p;
+ 				break;
+ 			}
+ 			/* \n came from us; last line of file, no newline */
+ 			assert(p > pvfree && *(p-1) == '\0');
+ 			--p;
+ 			break;
+ 		}
+ 		/* expand buffer and try again */
+ 		assert(*(pvend-1) == '\0');
+ 		total_v_size += INCBUFSIZE;
+ 		if (total_v_size > INT_MAX) {
+ 			PyErr_SetString(PyExc_OverflowError,
+ 			    "line is longer than a Python string can hold");
+ 			Py_DECREF(v);
+ 			return NULL;
+ 		}
+ 		if (_PyString_Resize(&v, (int)total_v_size) < 0)
+ 			return NULL;
+ 		/* overwrite the trailing null byte */
+ 		pvfree = BUF(v) + (total_v_size - INCBUFSIZE - 1);
+ 	}
+ 	if (BUF(v) + total_v_size != p)
+ 		_PyString_Resize(&v, p - BUF(v));
+ 	return v;
+ #undef INITBUFSIZE
+ #undef INCBUFSIZE
+ }
+ #endif	/* ifdef USE_MS_GETLINE_HACK */
  
  /* Internal routine to get a line.
***************
*** 662,665 ****
--- 826,833 ----
  	PyObject *v;
  
+ #ifdef USE_MS_GETLINE_HACK
+ 	if (n == 0)
+ 		return ms_getline_hack(fp);
+ #endif
  	n2 = n > 0 ? n : 100;
  	v = PyString_FromStringAndSize((char *)NULL, n2);
***************
*** 696,699 ****
--- 864,868 ----
  			PyErr_SetString(PyExc_OverflowError,
  			    "line is longer than a Python string can hold");
+ 			Py_DECREF(v);
  			return NULL;
  		}
***************
*** 1000,1004 ****
  			    	const char *buffer;
  			    	int len;
! 				if (((f->f_binary && 
  				      PyObject_AsReadBuffer(v,
  					      (const void**)&buffer,
--- 1169,1173 ----
  			    	const char *buffer;
  			    	int len;
! 				if (((f->f_binary &&
  				      PyObject_AsReadBuffer(v,
  					      (const void**)&buffer,
***************
*** 1256,1260 ****
  		if (fno == NULL)
  			return -1;
! 		
  		if (PyInt_Check(fno)) {
  			fd = PyInt_AsLong(fno);
--- 1425,1429 ----
  		if (fno == NULL)
  			return -1;
! 
  		if (PyInt_Check(fno)) {
  			fd = PyInt_AsLong(fno);