[Python-checkins] cpython: Issue #25349: Optimize bytes % int

victor.stinner python-checkins at python.org
Fri Oct 9 17:04:44 EDT 2015


https://hg.python.org/cpython/rev/d9a89c9137d2
changeset:   98622:d9a89c9137d2
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Fri Oct 09 22:43:24 2015 +0200
summary:
  Issue #25349: Optimize bytes % int

Optimize bytes.__mod__(args) for integere formats: %d (%i, %u), %o, %x and %X.
_PyBytesWriter is now used to format directly the integer into the writer
buffer, instead of using a temporary bytes object.

Formatting is between 30% and 50% faster on a microbenchmark.

files:
  Include/longobject.h  |    7 +
  Objects/bytesobject.c |   36 +++++++++
  Objects/longobject.c  |  120 +++++++++++++++++++++++------
  3 files changed, 136 insertions(+), 27 deletions(-)


diff --git a/Include/longobject.h b/Include/longobject.h
--- a/Include/longobject.h
+++ b/Include/longobject.h
@@ -182,6 +182,13 @@
     int base,
     int alternate);
 
+PyAPI_FUNC(char*) _PyLong_FormatBytesWriter(
+    _PyBytesWriter *writer,
+    char *str,
+    PyObject *obj,
+    int base,
+    int alternate);
+
 /* Format the object based on the format_spec, as defined in PEP 3101
    (Advanced String Formatting). */
 PyAPI_FUNC(int) _PyLong_FormatAdvancedWriter(
diff --git a/Objects/bytesobject.c b/Objects/bytesobject.c
--- a/Objects/bytesobject.c
+++ b/Objects/bytesobject.c
@@ -841,6 +841,42 @@
             case 'o':
             case 'x':
             case 'X':
+                if (PyLong_CheckExact(v)
+                        && width == -1 && prec == -1
+                        && !(flags & (F_SIGN | F_BLANK))
+                        && c != 'X')
+                {
+                    /* Fast path */
+                    int alternate = flags & F_ALT;
+                    int base;
+
+                    switch(c)
+                    {
+                        default:
+                            assert(0 && "'type' not in [diuoxX]");
+                        case 'd':
+                        case 'i':
+                        case 'u':
+                            base = 10;
+                            break;
+                        case 'o':
+                            base = 8;
+                            break;
+                        case 'x':
+                        case 'X':
+                            base = 16;
+                            break;
+                    }
+
+                    /* Fast path */
+                    writer.min_size -= 2; /* size preallocated by "%d" */
+                    res = _PyLong_FormatBytesWriter(&writer, res,
+                                                    v, base, alternate);
+                    if (res == NULL)
+                        goto error;
+                    continue;
+                }
+
                 temp = formatlong(v, flags, prec, c);
                 if (!temp)
                     goto error;
diff --git a/Objects/longobject.c b/Objects/longobject.c
--- a/Objects/longobject.c
+++ b/Objects/longobject.c
@@ -1582,7 +1582,9 @@
 static int
 long_to_decimal_string_internal(PyObject *aa,
                                 PyObject **p_output,
-                                _PyUnicodeWriter *writer)
+                                _PyUnicodeWriter *writer,
+                                _PyBytesWriter *bytes_writer,
+                                char **bytes_str)
 {
     PyLongObject *scratch, *a;
     PyObject *str;
@@ -1664,6 +1666,13 @@
         kind = writer->kind;
         str = NULL;
     }
+    else if (bytes_writer) {
+        *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, strlen);
+        if (*bytes_str == NULL) {
+            Py_DECREF(scratch);
+            return -1;
+        }
+    }
     else {
         str = PyUnicode_New(strlen, '9');
         if (str == NULL) {
@@ -1673,13 +1682,8 @@
         kind = PyUnicode_KIND(str);
     }
 
-#define WRITE_DIGITS(TYPE)                                            \
+#define WRITE_DIGITS(p)                                               \
     do {                                                              \
-        if (writer)                                                   \
-            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
-        else                                                          \
-            p = (TYPE*)PyUnicode_DATA(str) + strlen;                  \
-                                                                      \
         /* pout[0] through pout[size-2] contribute exactly            \
            _PyLong_DECIMAL_SHIFT digits each */                       \
         for (i=0; i < size - 1; i++) {                                \
@@ -1699,6 +1703,16 @@
         /* and sign */                                                \
         if (negative)                                                 \
             *--p = '-';                                               \
+    } while (0)
+
+#define WRITE_UNICODE_DIGITS(TYPE)                                    \
+    do {                                                              \
+        if (writer)                                                   \
+            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + strlen; \
+        else                                                          \
+            p = (TYPE*)PyUnicode_DATA(str) + strlen;                  \
+                                                                      \
+        WRITE_DIGITS(p);                                              \
                                                                       \
         /* check we've counted correctly */                           \
         if (writer)                                                   \
@@ -1708,25 +1722,34 @@
     } while (0)
 
     /* fill the string right-to-left */
-    if (kind == PyUnicode_1BYTE_KIND) {
+    if (bytes_writer) {
+        char *p = *bytes_str + strlen;
+        WRITE_DIGITS(p);
+        assert(p == *bytes_str);
+    }
+    else if (kind == PyUnicode_1BYTE_KIND) {
         Py_UCS1 *p;
-        WRITE_DIGITS(Py_UCS1);
+        WRITE_UNICODE_DIGITS(Py_UCS1);
     }
     else if (kind == PyUnicode_2BYTE_KIND) {
         Py_UCS2 *p;
-        WRITE_DIGITS(Py_UCS2);
+        WRITE_UNICODE_DIGITS(Py_UCS2);
     }
     else {
         Py_UCS4 *p;
         assert (kind == PyUnicode_4BYTE_KIND);
-        WRITE_DIGITS(Py_UCS4);
+        WRITE_UNICODE_DIGITS(Py_UCS4);
     }
 #undef WRITE_DIGITS
+#undef WRITE_UNICODE_DIGITS
 
     Py_DECREF(scratch);
     if (writer) {
         writer->pos += strlen;
     }
+    else if (bytes_writer) {
+        (*bytes_str) += strlen;
+    }
     else {
         assert(_PyUnicode_CheckConsistency(str, 1));
         *p_output = (PyObject *)str;
@@ -1738,7 +1761,7 @@
 long_to_decimal_string(PyObject *aa)
 {
     PyObject *v;
-    if (long_to_decimal_string_internal(aa, &v, NULL) == -1)
+    if (long_to_decimal_string_internal(aa, &v, NULL, NULL, NULL) == -1)
         return NULL;
     return v;
 }
@@ -1750,7 +1773,8 @@
 
 static int
 long_format_binary(PyObject *aa, int base, int alternate,
-                   PyObject **p_output, _PyUnicodeWriter *writer)
+                   PyObject **p_output, _PyUnicodeWriter *writer,
+                   _PyBytesWriter *bytes_writer, char **bytes_str)
 {
     PyLongObject *a = (PyLongObject *)aa;
     PyObject *v;
@@ -1812,6 +1836,11 @@
         kind = writer->kind;
         v = NULL;
     }
+    else if (writer) {
+        *bytes_str = _PyBytesWriter_Prepare(bytes_writer, *bytes_str, sz);
+        if (*bytes_str == NULL)
+            return -1;
+    }
     else {
         v = PyUnicode_New(sz, 'x');
         if (v == NULL)
@@ -1819,13 +1848,8 @@
         kind = PyUnicode_KIND(v);
     }
 
-#define WRITE_DIGITS(TYPE)                                              \
+#define WRITE_DIGITS(p)                                                 \
     do {                                                                \
-        if (writer)                                                     \
-            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
-        else                                                            \
-            p = (TYPE*)PyUnicode_DATA(v) + sz;                          \
-                                                                        \
         if (size_a == 0) {                                              \
             *--p = '0';                                                 \
         }                                                               \
@@ -1860,30 +1884,50 @@
         }                                                               \
         if (negative)                                                   \
             *--p = '-';                                                 \
+    } while (0)
+
+#define WRITE_UNICODE_DIGITS(TYPE)                                      \
+    do {                                                                \
+        if (writer)                                                     \
+            p = (TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos + sz; \
+        else                                                            \
+            p = (TYPE*)PyUnicode_DATA(v) + sz;                          \
+                                                                        \
+        WRITE_DIGITS(p);                                                \
+                                                                        \
         if (writer)                                                     \
             assert(p == ((TYPE*)PyUnicode_DATA(writer->buffer) + writer->pos)); \
         else                                                            \
             assert(p == (TYPE*)PyUnicode_DATA(v));                      \
     } while (0)
 
-    if (kind == PyUnicode_1BYTE_KIND) {
+    if (bytes_writer) {
+        char *p = *bytes_str + sz;
+        WRITE_DIGITS(p);
+        assert(p == *bytes_str);
+    }
+    else if (kind == PyUnicode_1BYTE_KIND) {
         Py_UCS1 *p;
-        WRITE_DIGITS(Py_UCS1);
+        WRITE_UNICODE_DIGITS(Py_UCS1);
     }
     else if (kind == PyUnicode_2BYTE_KIND) {
         Py_UCS2 *p;
-        WRITE_DIGITS(Py_UCS2);
+        WRITE_UNICODE_DIGITS(Py_UCS2);
     }
     else {
         Py_UCS4 *p;
         assert (kind == PyUnicode_4BYTE_KIND);
-        WRITE_DIGITS(Py_UCS4);
+        WRITE_UNICODE_DIGITS(Py_UCS4);
     }
 #undef WRITE_DIGITS
+#undef WRITE_UNICODE_DIGITS
 
     if (writer) {
         writer->pos += sz;
     }
+    else if (bytes_writer) {
+        (*bytes_str) += sz;
+    }
     else {
         assert(_PyUnicode_CheckConsistency(v, 1));
         *p_output = v;
@@ -1897,9 +1941,9 @@
     PyObject *str;
     int err;
     if (base == 10)
-        err = long_to_decimal_string_internal(obj, &str, NULL);
+        err = long_to_decimal_string_internal(obj, &str, NULL, NULL, NULL);
     else
-        err = long_format_binary(obj, base, 1, &str, NULL);
+        err = long_format_binary(obj, base, 1, &str, NULL, NULL, NULL);
     if (err == -1)
         return NULL;
     return str;
@@ -1911,9 +1955,31 @@
                      int base, int alternate)
 {
     if (base == 10)
-        return long_to_decimal_string_internal(obj, NULL, writer);
+        return long_to_decimal_string_internal(obj, NULL, writer,
+                                               NULL, NULL);
     else
-        return long_format_binary(obj, base, alternate, NULL, writer);
+        return long_format_binary(obj, base, alternate, NULL, writer,
+                                  NULL, NULL);
+}
+
+char*
+_PyLong_FormatBytesWriter(_PyBytesWriter *writer, char *str,
+                          PyObject *obj,
+                          int base, int alternate)
+{
+    char *str2;
+    int res;
+    str2 = str;
+    if (base == 10)
+        res = long_to_decimal_string_internal(obj, NULL, NULL,
+                                              writer, &str2);
+    else
+        res = long_format_binary(obj, base, alternate, NULL, NULL,
+                                 writer, &str2);
+    if (res < 0)
+        return NULL;
+    assert(str2 != NULL);
+    return str2;
 }
 
 /* Table of digit values for 8-bit string -> integer conversion.

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list