[Python-checkins] cpython: Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob.

serhiy.storchaka python-checkins at python.org
Sat Feb 16 16:44:35 CET 2013


http://hg.python.org/cpython/rev/088a14031998
changeset:   82225:088a14031998
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Feb 16 17:43:45 2013 +0200
summary:
  Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob.

files:
  Lib/test/test_zipimport.py |   2 +
  Misc/NEWS                  |   2 +
  Modules/zipimport.c        |  27 +++++++++++++++++--------
  3 files changed, 22 insertions(+), 9 deletions(-)


diff --git a/Lib/test/test_zipimport.py b/Lib/test/test_zipimport.py
--- a/Lib/test/test_zipimport.py
+++ b/Lib/test/test_zipimport.py
@@ -196,6 +196,7 @@
             for name, (mtime, data) in files.items():
                 zinfo = ZipInfo(name, time.localtime(mtime))
                 zinfo.compress_type = self.compression
+                zinfo.comment = b"spam"
                 z.writestr(zinfo, data)
             z.close()
 
@@ -245,6 +246,7 @@
             for name, (mtime, data) in files.items():
                 zinfo = ZipInfo(name, time.localtime(mtime))
                 zinfo.compress_type = self.compression
+                zinfo.comment = b"eggs"
                 z.writestr(zinfo, data)
             z.close()
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,8 @@
 Core and Builtins
 -----------------
 
+- Issue #8745: Small speed up zipimport on Windows. Patch by Catalin Iacob.
+
 - Issue #5308: Raise ValueError when marshalling too large object (a sequence
   with size >= 2**31), instead of producing illegal marshal data.
 
diff --git a/Modules/zipimport.c b/Modules/zipimport.c
--- a/Modules/zipimport.c
+++ b/Modules/zipimport.c
@@ -862,6 +862,7 @@
     long l, count;
     Py_ssize_t i;
     char name[MAXPATHLEN + 5];
+    char dummy[8]; /* Buffer to read unused header values into */
     PyObject *nameobj = NULL;
     char *p, endof_central_dir[22];
     Py_ssize_t arc_offset;  /* Absolute offset to start of the zip-archive. */
@@ -905,17 +906,23 @@
 
     /* Start of Central Directory */
     count = 0;
+    if (fseek(fp, header_offset, 0) == -1)
+        goto file_error;
     for (;;) {
         PyObject *t;
         int err;
 
-        if (fseek(fp, header_offset, 0) == -1)  /* Start of file header */
-            goto fseek_error;
+        /* Start of file header */
         l = PyMarshal_ReadLongFromFile(fp);
         if (l != 0x02014B50)
             break;              /* Bad: Central Dir File Header */
-        if (fseek(fp, header_offset + 8, 0) == -1)
-            goto fseek_error;
+
+        /* On Windows, calling fseek to skip over the fields we don't use is
+        slower than reading the data into a dummy buffer because fseek flushes
+        stdio's internal buffers. See issue #8745. */
+        if (fread(dummy, 1, 4, fp) != 4) /* Skip unused fields, avoid fseek */
+            goto file_error;
+
         flags = (unsigned short)PyMarshal_ReadShortFromFile(fp);
         compress = PyMarshal_ReadShortFromFile(fp);
         time = PyMarshal_ReadShortFromFile(fp);
@@ -924,11 +931,11 @@
         data_size = PyMarshal_ReadLongFromFile(fp);
         file_size = PyMarshal_ReadLongFromFile(fp);
         name_size = PyMarshal_ReadShortFromFile(fp);
-        header_size = 46 + name_size +
+        header_size = name_size +
            PyMarshal_ReadShortFromFile(fp) +
            PyMarshal_ReadShortFromFile(fp);
-        if (fseek(fp, header_offset + 42, 0) == -1)
-            goto fseek_error;
+        if (fread(dummy, 1, 8, fp) != 8) /* Skip unused fields, avoid fseek */
+            goto file_error;
         file_offset = PyMarshal_ReadLongFromFile(fp) + arc_offset;
         if (name_size > MAXPATHLEN)
             name_size = MAXPATHLEN;
@@ -941,7 +948,9 @@
             p++;
         }
         *p = 0;         /* Add terminating null byte */
-        header_offset += header_size;
+        for (; i < header_size; i++) /* Skip the rest of the header */
+            if(getc(fp) == EOF) /* Avoid fseek */
+                goto file_error;
 
         bootstrap = 0;
         if (flags & 0x0800)
@@ -988,7 +997,7 @@
         PySys_FormatStderr("# zipimport: found %ld names in %R\n",
                            count, archive);
     return files;
-fseek_error:
+file_error:
     fclose(fp);
     Py_XDECREF(files);
     Py_XDECREF(nameobj);

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list