[Python-checkins] cpython: Issue #23344: marshal.dumps() is now 20-25% faster on average.

serhiy.storchaka python-checkins at python.org
Wed Feb 11 14:56:46 CET 2015


https://hg.python.org/cpython/rev/bb05f845e7dc
changeset:   94587:bb05f845e7dc
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Wed Feb 11 15:54:54 2015 +0200
summary:
  Issue #23344: marshal.dumps() is now 20-25% faster on average.

files:
  Doc/whatsnew/3.5.rst |   5 +-
  Misc/NEWS            |   2 +
  Python/marshal.c     |  83 +++++++++++++++++++++++--------
  3 files changed, 67 insertions(+), 23 deletions(-)


diff --git a/Doc/whatsnew/3.5.rst b/Doc/whatsnew/3.5.rst
--- a/Doc/whatsnew/3.5.rst
+++ b/Doc/whatsnew/3.5.rst
@@ -381,8 +381,9 @@
 * Many operations on :class:`io.BytesIO` are now 50% to 100% faster.
   (Contributed by Serhiy Storchaka in :issue:`15381`.)
 
-* :func:`marshal.dumps` with versions 3 and 4 is now 40-50% faster on average.
-  (Contributed by Serhiy Storchaka in :issue:`20416`.)
+* :func:`marshal.dumps` is now faster (65%-85% with versions 3--4, 20-25% with
+  versions 0--2 on typical data, and up to 5x in best cases).
+  (Contributed by Serhiy Storchaka in :issue:`20416` and :issue:`23344`.)
 
 
 Build and C API Changes
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -13,6 +13,8 @@
 Library
 -------
 
+- Issue #23344: marshal.dumps() is now 20-25% faster on average.
+
 - Issue #20416: marshal.dumps() with protocols 3 and 4 is now 40-50% faster on
   average.
 
diff --git a/Python/marshal.c b/Python/marshal.c
--- a/Python/marshal.c
+++ b/Python/marshal.c
@@ -78,42 +78,75 @@
     int version;
 } WFILE;
 
-#define w_byte(c, p) if (((p)->fp)) putc((c), (p)->fp); \
-                      else if ((p)->ptr != (p)->end) *(p)->ptr++ = (c); \
-                           else w_more((c), p)
+#define w_byte(c, p) do {                               \
+        if ((p)->ptr != (p)->end || w_reserve((p), 1))  \
+            *(p)->ptr++ = (c);                          \
+    } while(0)
 
 static void
-w_more(char c, WFILE *p)
+w_flush(WFILE *p)
 {
-    Py_ssize_t size, newsize;
-    if (p->str == NULL)
-        return; /* An error already occurred */
+    assert(p->fp != NULL);
+    fwrite(p->buf, 1, p->ptr - p->buf, p->fp);
+    p->ptr = p->buf;
+}
+
+static int
+w_reserve(WFILE *p, Py_ssize_t needed)
+{
+    Py_ssize_t pos, size, delta;
+    if (p->ptr == NULL)
+        return 0; /* An error already occurred */
+    if (p->fp != NULL) {
+        w_flush(p);
+        return needed <= p->end - p->ptr;
+    }
+    assert(p->str != NULL);
+    pos = p->ptr - p->buf;
     size = PyBytes_Size(p->str);
-    newsize = size + size + 1024;
-    if (newsize > 32*1024*1024) {
-        newsize = size + (size >> 3);           /* 12.5% overallocation */
+    if (size > 16*1024*1024)
+        delta = (size >> 3);            /* 12.5% overallocation */
+    else
+        delta = size + 1024;
+    delta = Py_MAX(delta, needed);
+    if (delta > PY_SSIZE_T_MAX - size) {
+        p->error = WFERR_NOMEMORY;
+        return 0;
     }
-    if (_PyBytes_Resize(&p->str, newsize) != 0) {
-        p->ptr = p->end = NULL;
+    size += delta;
+    if (_PyBytes_Resize(&p->str, size) != 0) {
+        p->ptr = p->buf = p->end = NULL;
+        return 0;
     }
     else {
-        p->ptr = PyBytes_AS_STRING((PyBytesObject *)p->str) + size;
-        p->end =
-            PyBytes_AS_STRING((PyBytesObject *)p->str) + newsize;
-        *p->ptr++ = c;
+        p->buf = PyBytes_AS_STRING(p->str);
+        p->ptr = p->buf + pos;
+        p->end = p->buf + size;
+        return 1;
     }
 }
 
 static void
 w_string(const char *s, Py_ssize_t n, WFILE *p)
 {
+    Py_ssize_t m;
+    if (!n || p->ptr == NULL)
+        return;
+    m = p->end - p->ptr;
     if (p->fp != NULL) {
-        fwrite(s, 1, n, p->fp);
+        if (n <= m) {
+            Py_MEMCPY(p->ptr, s, n);
+            p->ptr += n;
+        }
+        else {
+            w_flush(p);
+            fwrite(s, 1, n, p->fp);
+        }
     }
     else {
-        while (--n >= 0) {
-            w_byte(*s, p);
-            s++;
+        if (n <= m || w_reserve(p, n - m)) {
+            Py_MEMCPY(p->ptr, s, n);
+            p->ptr += n;
         }
     }
 }
@@ -573,26 +606,34 @@
 void
 PyMarshal_WriteLongToFile(long x, FILE *fp, int version)
 {
+    char buf[4];
     WFILE wf;
     memset(&wf, 0, sizeof(wf));
     wf.fp = fp;
+    wf.ptr = wf.buf = buf;
+    wf.end = wf.ptr + sizeof(buf);
     wf.error = WFERR_OK;
     wf.version = version;
     w_long(x, &wf);
+    w_flush(&wf);
 }
 
 void
 PyMarshal_WriteObjectToFile(PyObject *x, FILE *fp, int version)
 {
+    char buf[BUFSIZ];
     WFILE wf;
     memset(&wf, 0, sizeof(wf));
     wf.fp = fp;
+    wf.ptr = wf.buf = buf;
+    wf.end = wf.ptr + sizeof(buf);
     wf.error = WFERR_OK;
     wf.version = version;
     if (w_init_refs(&wf, version))
         return; /* caller mush check PyErr_Occurred() */
     w_object(x, &wf);
     w_clear_refs(&wf);
+    w_flush(&wf);
 }
 
 typedef WFILE RFILE; /* Same struct with different invariants */
@@ -1533,7 +1574,7 @@
     wf.str = PyBytes_FromStringAndSize((char *)NULL, 50);
     if (wf.str == NULL)
         return NULL;
-    wf.ptr = PyBytes_AS_STRING((PyBytesObject *)wf.str);
+    wf.ptr = wf.buf = PyBytes_AS_STRING((PyBytesObject *)wf.str);
     wf.end = wf.ptr + PyBytes_Size(wf.str);
     wf.error = WFERR_OK;
     wf.version = version;

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list