[Python-checkins] CVS: python/dist/src/Objects fileobject.c,2.105,2.106
Tim Peters
python-dev@python.org
Mon, 15 Jan 2001 02:36:59 -0800
Update of /cvsroot/python/python/dist/src/Objects
In directory usw-pr-cvs1:/tmp/cvs-serv28099/python/dist/src/objects
Modified Files:
fileobject.c
Log Message:
Speed getline_via_fgets(), by supplying two "fast paths", although one is
faster than the other. Should be faster for Mark Favas's 254-character
mail log lines, and *is* 3-4% quicker for my test case with much shorter
lines (but they're typical of *my* text files, and I'm tired of optimizing
for everyone else at my expense <wink> -- in fact, the only one who loses
here is Guido ...).
Index: fileobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/fileobject.c,v
retrieving revision 2.105
retrieving revision 2.106
diff -C2 -r2.105 -r2.106
*** fileobject.c 2001/01/15 06:33:19 2.105
--- fileobject.c 2001/01/15 10:36:56 2.106
***************
*** 689,763 ****
{
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
! * no-realloc path. get_line uses 100 for its initial size, but isn't trying
! * to avoid reallocs. Under MSVC 6, and using files with lines all under 100
! * chars long, dropping this from 200 to 100 bought less than 1% speedup.
! * Since many kinds of log files have lines exceeding 100 chars, the tiny
! * slowdown from using 200 is more than offset by the large speedup for such
! * log files.
! * INCBUFSIZE is the amount by which we grow the buffer, if INITBUFSIZE isn't
! * enough. It doesn't much matter what this set to.
*/
! #define INITBUFSIZE 200
#define INCBUFSIZE 1000
PyObject* v; /* the string object result */
- size_t total_v_size; /* total # chars in v's buffer */
char* pvfree; /* address of next free slot */
char* pvend; /* address one beyond last free slot */
! char* p; /* temp */
! char buf[INITBUFSIZE];
/* Optimize for normal case: avoid _PyString_Resize if at all
! * possible via first reading into auto buf.
*/
! Py_BEGIN_ALLOW_THREADS
! memset(buf, '\n', INITBUFSIZE);
! p = fgets(buf, INITBUFSIZE, fp);
! Py_END_ALLOW_THREADS
! if (p == NULL) {
! clearerr(fp);
! if (PyErr_CheckSignals())
! return NULL;
! v = PyString_FromStringAndSize("", 0);
! return v;
! }
! /* fgets read *something* */
! p = memchr(buf, '\n', INITBUFSIZE);
! if (p != NULL) {
! /* Did the \n come from fgets or from us?
! * Since fgets stops at the first \n, and then writes \0, if
! * it's from fgets a \0 must be next. But if that's so, it
! * could not have come from us, since the \n's we filled the
! * buffer with have only more \n's to the right.
! */
! pvend = buf + INITBUFSIZE;
! if (p+1 < pvend && *(p+1) == '\0') {
! /* It's from fgets: we win! In particular, we
! * haven't done any mallocs yet, and can build the
! * final result on the first try.
*/
! v = PyString_FromStringAndSize(buf, p - buf + 1);
return v;
}
! /* Must be from us: fgets didn't fill the buffer and didn't
! * find a newline, so it must be the last and newline-free
! * line of the file.
*/
! assert(p > buf && *(p-1) == '\0');
! v = PyString_FromStringAndSize(buf, p - buf - 1);
! return v;
}
! /* yuck: fgets overwrote all the newlines, i.e. the entire buffer.
! * So this line isn't over yet, or maybe it is but we're exactly at
! * EOF; in either case, we're tired <wink>.
*/
! assert(buf[INITBUFSIZE-1] == '\0');
! total_v_size = INITBUFSIZE + INCBUFSIZE;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL)
return v;
/* copy over everything except the last null byte */
! memcpy(BUF(v), buf, INITBUFSIZE-1);
! pvfree = BUF(v) + INITBUFSIZE - 1;
/* Keep reading stuff into v; if it ever ends successfully, break
--- 689,791 ----
{
/* INITBUFSIZE is the maximum line length that lets us get away with the fast
! * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
! * to fill this much of the buffer with a known value in order to figure out
! * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
! * than "most" lines, we waste time filling unused buffer slots. 100 is
! * surely adequate for most peoples' email archives, chewing over source code,
! * etc -- "regular old text files".
! * MAXBUFSIZE is the maximum line length that lets us get away with the less
! * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
! * cautions about boosting that. 300 was chosen because the worst real-life
! * text-crunching job reported on Python-Dev was a mail-log crawler where over
! * half the lines were 254 chars.
! * INCBUFSIZE is the amount by which we grow the buffer, if MAXBUFSIZE isn't
! * enough. It doesn't much matter what this is set to: we only get here for
! * absurdly long lines anyway.
*/
! #define INITBUFSIZE 100
! #define MAXBUFSIZE 300
#define INCBUFSIZE 1000
+ char* p; /* temp */
+ char buf[MAXBUFSIZE];
PyObject* v; /* the string object result */
char* pvfree; /* address of next free slot */
char* pvend; /* address one beyond last free slot */
! size_t nfree; /* # of free buffer slots; pvend-pvfree */
! size_t total_v_size; /* total # of slots in buffer */
/* Optimize for normal case: avoid _PyString_Resize if at all
! * possible via first reading into stack buffer "buf".
*/
! total_v_size = INITBUFSIZE; /* start small and pray */
! pvfree = buf;
! for (;;) {
! Py_BEGIN_ALLOW_THREADS
! pvend = buf + total_v_size;
! nfree = pvend - pvfree;
! memset(pvfree, '\n', nfree);
! p = fgets(pvfree, nfree, fp);
! Py_END_ALLOW_THREADS
! if (p == NULL) {
! clearerr(fp);
! if (PyErr_CheckSignals())
! return NULL;
! v = PyString_FromStringAndSize(buf, pvfree - buf);
! return v;
! }
! /* fgets read *something* */
! p = memchr(pvfree, '\n', nfree);
! if (p != NULL) {
! /* Did the \n come from fgets or from us?
! * Since fgets stops at the first \n, and then writes
! * \0, if it's from fgets a \0 must be next. But if
! * that's so, it could not have come from us, since
! * the \n's we filled the buffer with have only more
! * \n's to the right.
*/
! if (p+1 < pvend && *(p+1) == '\0') {
! /* It's from fgets: we win! In particular,
! * we haven't done any mallocs yet, and can
! * build the final result on the first try.
! */
! ++p; /* include \n from fgets */
! }
! else {
! /* Must be from us: fgets didn't fill the
! * buffer and didn't find a newline, so it
! * must be the last and newline-free line of
! * the file.
! */
! assert(p > pvfree && *(p-1) == '\0');
! --p; /* don't include \0 from fgets */
! }
! v = PyString_FromStringAndSize(buf, p - buf);
return v;
}
! /* yuck: fgets overwrote all the newlines, i.e. the entire
! * buffer. So this line isn't over yet, or maybe it is but
! * we're exactly at EOF. If we haven't already, try using the
! * rest of the stack buffer.
*/
! assert(*(pvend-1) == '\0');
! if (pvfree == buf) {
! pvfree = pvend - 1; /* overwrite trailing null */
! total_v_size = MAXBUFSIZE;
! }
! else
! break;
}
!
! /* The stack buffer isn't big enough; malloc a string object and read
! * into its buffer.
*/
! total_v_size = MAXBUFSIZE + INCBUFSIZE;
v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
if (v == NULL)
return v;
/* copy over everything except the last null byte */
! memcpy(BUF(v), buf, MAXBUFSIZE-1);
! pvfree = BUF(v) + MAXBUFSIZE - 1;
/* Keep reading stuff into v; if it ever ends successfully, break
***************
*** 767,772 ****
*/
for (;;) {
- size_t nfree;
-
Py_BEGIN_ALLOW_THREADS
pvend = BUF(v) + total_v_size;
--- 795,798 ----
***************
*** 815,818 ****
--- 841,845 ----
return v;
#undef INITBUFSIZE
+ #undef MAXBUFSIZE
#undef INCBUFSIZE
}