[issue15522] impove 27 percent performance on stringpbject.c( by prefetch and loop optimization)

abael report at bugs.python.org
Wed Aug 1 11:33:52 CEST 2012


New submission from abael:

Python-2.7.3/Objects/stringobject.c( SHA256SUM ad7795c75e2a25247e4dea4cc5327c225c4da03b7c7d57226c817ba6d12a316c)
static PyObject *string_join(PyStringObject *self, PyObject *orig);

OLD IMPLEMENT LOGIC(Pseudo code):
        char *sep = PyString_AS_STRING(self);
        const Py_ssize_t seplen = PyString_GET_SIZE(self);

        seq = PySequence_Fast(orig, "");
        seqlen = PySequence_Size(seq);

        if (seqlen == 0)
            return PyString_FromString("");
        else if (seqlen == 1)return the exactly first one item;
        else{
            for (i = 0; i < seqlen; i++) {
                const size_t old_sz = sz;
                item = PySequence_Fast_GET_ITEM(seq, i);
                if (!PyString_Check(item)){
                   if ( Py_USING_UNICODE and PyUnicode_Check(item))
                        return PyUnicode_Join((PyObject *)self, seq);
                   else  PyErr_Format(...);
                }
                sz += PyString_GET_SIZE(item);

                if (i != 0)
                    sz += seplen;
            }
        }

        /* Allocate result space. */
        res = PyString_FromStringAndSize((char*)NULL, sz);

        /* Catenate everything. */
        p = PyString_AS_STRING(res);
        for (i = 0; i < seqlen; ++i) {
            size_t n;
            item = PySequence_Fast_GET_ITEM(seq, i);
            n = PyString_GET_SIZE(item);
            Py_MEMCPY(p, PyString_AS_STRING(item), n);
            p += n;
            if (i < seqlen - 1) {
                Py_MEMCPY(p, sep, seplen);
                p += seplen;
            }
        }




Abael's IMPLEMENT LOGIC:
        char *sep = PyString_AS_STRING(self);
        const Py_ssize_t seplen = PyString_GET_SIZE(self);

        seq = PySequence_Fast(orig, "");
        seqlen = PySequence_Size(seq);

        if (seqlen == 0)
            return PyString_FromString("");
        if (seqlen == 1)
            return the exactly first one item;

        if (seqlen <0)return NULL

         /**** PREFETCH start, get the first item size, since here we can assume seqleng >= 2 ****/
        register size_t sz=0;
        register size_t old_sz=0;
        PyObject *res = NULL;

        item = PySequence_Fast_GET_ITEM(seq, 0);
        if (!PyString_Check(item)){
           if ( Py_USING_UNICODE and PyUnicode_Check(item))
                return PyUnicode_Join((PyObject *)self, seq);
           else  PyErr_Format(...);
        }

        sz += PyString_GET_SIZE(item);
        if (sz < old_sz || sz > PY_SSIZE_T_MAX) PyErr_SetString(PyExc_OverflowError,"join() result is too long for a Python string");
         /**** PREFETCH end, get the first item size, since here we can assume seqleng >= 2 ****/

        register Py_ssize_t i;
        for (i=1; i < seqlen; i++) { /**** then here we can loop start from 1 ****/
            const size_t old_sz = sz;
            item = PySequence_Fast_GET_ITEM(seq, i);
            if (!PyString_Check(item)){
               if ( Py_USING_UNICODE and PyUnicode_Check(item))
                    return PyUnicode_Join((PyObject *)self, seq);
               else  PyErr_Format(...);
            }
            sz += PyString_GET_SIZE(item);
            sz += seplen; /**** now we don't need to test (i != 0) every loop ****/
        }

        /* Allocate result space. */
        res = PyString_FromStringAndSize((char*)NULL, sz);

        /* Catenate everything. */
        /**** PREFETCH start, memcpy the first item first, since here we can assume seqleng >= 2 ****/
        register char *p = PyString_AS_STRING(res);
        item = PySequence_Fast_GET_ITEM(seq, 0);
        sz = PyString_GET_SIZE(item);
        Py_MEMCPY(p, PyString_AS_STRING(item),sz);
        p += sz;
        /**** PREFETCH end, memcpy the first item first, since here we can assume seqleng >= 2 ****/

        for (i=1; i<seqlen; ++i){ /**** here we also loop start from 1 ****/
            item = PySequence_Fast_GET_ITEM(seq, i);
            sz = PyString_GET_SIZE(item);
            Py_MEMCPY(p, sep, seplen); /**** avoid test (i < seqlen - 1) each loop in old implement ****/ 
            p += seplen;
            Py_MEMCPY(p, PyString_AS_STRING(item),sz);
            p += sz;
        }
        return res;

----------
components: Library (Lib)
messages: 167107
nosy: abael
priority: normal
severity: normal
status: open
title: impove 27 percent performance on stringpbject.c( by prefetch and loop optimization)
type: performance
versions: Python 2.7

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue15522>
_______________________________________


More information about the Python-bugs-list mailing list