[Python-checkins] CVS: python/dist/src/Objects fileobject.c,2.99,2.100
Tim Peters
python-dev@python.org
Sun, 07 Jan 2001 16:53:15 -0800
Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv9879/python/dist/src/objects
Modified Files:
fileobject.c
Log Message:
Fiddled ms_getline_hack after talking w/ Guido: made clearer that the
code duplication is to let us get away without a realloc whenever possible;
boosted the init buf size (the cutoff at which we *can* get away without
a realloc) from 100 to 200 so that more files can enjoy this boost; and
allowed other threads to run in all cases. The last two cost something,
but not significantly: in my fat test case, less than a 1% slowdown total.
Since my test case has a great many short lines, that's probably the worst
slowdown, too. While the logic barely changed, there were lots of edits.
This also gets rid of the reference to fp->_cnt, so the last platform
assumption being made here is that fgets doesn't overwrite bytes
capriciously (== beyond the terminating null byte it must write).
Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.99
retrieving revision 2.100
diff -C2 -r2.99 -r2.100
*** fileobject.c 2001/01/07 21:19:34 2.99
--- fileobject.c 2001/01/08 00:53:12 2.100
***************
*** 655,661 ****
stdio buffer, and we optimize heavily for that case.
! CAUTION: This routine cheats, relying on how MSVC 6 works internally.
! They seem to be relatively safe cheats, but we should expect this code
! to break someday.
**************************************************************************/
--- 655,662 ----
stdio buffer, and we optimize heavily for that case.
! CAUTION: This routine cheats, relying on that MSVC 6 fgets doesn't overwrite
! any buffer positions to the right of the terminating null byte. Seems
! unlikely that will change in the future, but ... std test test_bufio should
! catch it if that changes.
**************************************************************************/
***************
*** 669,673 ****
ms_getline_hack(FILE *fp)
{
! #define INITBUFSIZE 100
#define INCBUFSIZE 1000
PyObject* v; /* the string object result */
--- 670,684 ----
ms_getline_hack(FILE *fp)
{
! /* INITBUFSIZE is the maximum line length that lets us get away with the fast
! * no-realloc path. get_line uses 100 for its initial size, but isn't trying
! * to avoid reallocs. Under MSVC 6, and using files with lines all under 100
! * chars long, dropping this from 200 to 100 bought less than 1% speedup.
! * Since many kinds of log files have lines exceeding 100 chars, the tiny
! * slowdown from using 200 is more than offset by the large speedup for such
! * log files.
! * INCBUFSIZE is the amount by which we grow the buffer, if INITBUFSIZE isn't
! * enough. It doesn't much matter what this set to.
! */
! #define INITBUFSIZE 200
#define INCBUFSIZE 1000
PyObject* v; /* the string object result */
***************
*** 676,748 ****
char* pvend; /* address one beyond last free slot */
char* p; /* temp */
! if (fp->_cnt > 0) { /* HACK: "_cnt" isn't advertised */
! /* optimize for normal case: something sitting in the
! * buffer ready to go; avoid thread fiddling & realloc
! * if possible
! */
! char msbuf[INITBUFSIZE];
! memset(msbuf, '\n', INITBUFSIZE);
! p = fgets(msbuf, INITBUFSIZE, fp);
! /* since we didn't lock the file, there's no guarantee
! * anything was still in the buffer
*/
! if (p == NULL) {
! clearerr(fp);
! if (PyErr_CheckSignals())
! return NULL;
! v = PyString_FromStringAndSize("", 0);
! return v;
! }
! /* fgets read *something* */
! p = memchr(msbuf, '\n', INITBUFSIZE);
! if (p != NULL) {
! /* Did the \n come from fgets or from us?
! * Since fgets stops at the first \n, and then
! * writes \0, if it's from fgets a \0 must be next.
! * But if that's so, it could not have come from us,
! * since the \n's we filled the buffer with have only
! * more \n's to the right.
! */
! pvend = msbuf + INITBUFSIZE;
! if (p+1 < pvend && *(p+1) == '\0') {
! /* it's from fgets: we win! */
! v = PyString_FromStringAndSize(msbuf,
! p - msbuf + 1);
! return v;
! }
! /* Must be from us: fgets didn't fill the buffer
! * and didn't find a newline, so it must be the
! * last and newline-free line of the file.
*/
! assert(p > msbuf && *(p-1) == '\0');
! v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
return v;
}
! /* yuck: fgets overwrote all the newlines, i.e. the entire
! * buffer. So this line isn't over yet, or maybe it is but
! * we're exactly at EOF; in either case, we're tired <wink>.
*/
! assert(msbuf[INITBUFSIZE-1] == '\0');
! total_v_size = INITBUFSIZE + INCBUFSIZE;
! v = PyString_FromStringAndSize((char*)NULL,
! (int)total_v_size);
! if (v == NULL)
! return v;
! /* copy over everything except the last null byte */
! memcpy(BUF(v), msbuf, INITBUFSIZE-1);
! pvfree = BUF(v) + INITBUFSIZE - 1;
}
! else {
! /* The stream isn't ready or isn't buffered. */
! v = PyString_FromStringAndSize((char*)NULL, INITBUFSIZE);
! if (v == NULL)
! return v;
! total_v_size = INITBUFSIZE;
! pvfree = BUF(v);
! }
/* Keep reading stuff into v; if it ever ends successfully, break
! * after setting p one beyond the end of the line.
*/
for (;;) {
--- 687,751 ----
char* pvend; /* address one beyond last free slot */
char* p; /* temp */
+ char msbuf[INITBUFSIZE];
! /* Optimize for normal case: avoid _PyString_Resize if at all
! * possible via first reading into auto msbuf.
! */
! Py_BEGIN_ALLOW_THREADS
! memset(msbuf, '\n', INITBUFSIZE);
! p = fgets(msbuf, INITBUFSIZE, fp);
! Py_END_ALLOW_THREADS
!
! if (p == NULL) {
! clearerr(fp);
! if (PyErr_CheckSignals())
! return NULL;
! v = PyString_FromStringAndSize("", 0);
! return v;
! }
! /* fgets read *something* */
! p = memchr(msbuf, '\n', INITBUFSIZE);
! if (p != NULL) {
! /* Did the \n come from fgets or from us?
! * Since fgets stops at the first \n, and then writes \0, if
! * it's from fgets a \0 must be next. But if that's so, it
! * could not have come from us, since the \n's we filled the
! * buffer with have only more \n's to the right.
*/
! pvend = msbuf + INITBUFSIZE;
! if (p+1 < pvend && *(p+1) == '\0') {
! /* It's from fgets: we win! In particular, we
! * haven't done any mallocs yet, and can build the
! * final result on the first try.
*/
! v = PyString_FromStringAndSize(msbuf, p - msbuf + 1);
return v;
}
! /* Must be from us: fgets didn't fill the buffer and didn't
! * find a newline, so it must be the last and newline-free
! * line of the file.
*/
! assert(p > msbuf && *(p-1) == '\0');
! v = PyString_FromStringAndSize(msbuf, p - msbuf - 1);
! return v;
}
! /* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
! * So this line isn't over yet, or maybe it is but we're exactly at
! *EOF; in either case, we're tired <wink>.
! */
! assert(msbuf[INITBUFSIZE-1] == '\0');
! total_v_size = INITBUFSIZE + INCBUFSIZE;
! v = PyString_FromStringAndSize((char*)NULL,
! (int)total_v_size);
! if (v == NULL)
! return v;
! /* copy over everything except the last null byte */
! memcpy(BUF(v), msbuf, INITBUFSIZE-1);
! pvfree = BUF(v) + INITBUFSIZE - 1;
/* Keep reading stuff into v; if it ever ends successfully, break
! * after setting p one beyond the end of the line. The code here is
! * very much like the code above, except reads into v's buffer; see
! * the code above for detailed comments about the logic.
*/
for (;;) {
***************
*** 765,769 ****
break;
}
- /* See the "normal case" comments above for details. */
p = memchr(pvfree, '\n', nfree);
if (p != NULL) {
--- 768,771 ----