[Python-checkins] gh-91247: Use memcpy for list and tuple repeat (#91482)
sweeneyde
webhook-mailer at python.org
Mon Jul 25 22:11:05 EDT 2022
https://github.com/python/cpython/commit/2ef73be891eb95064e268341e38e81d008add480
commit: 2ef73be891eb95064e268341e38e81d008add480
branch: main
author: Pieter Eendebak <pieter.eendebak at gmail.com>
committer: sweeneyde <36520290+sweeneyde at users.noreply.github.com>
date: 2022-07-25T22:10:23-04:00
summary:
gh-91247: Use memcpy for list and tuple repeat (#91482)
* Add _Py_memory_repeat function to pycore_list
* Add _Py_RefcntAdd function to pycore_object
* Use the new functions in tuplerepeat, list_repeat, and list_inplace_repeat
files:
A Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst
M Include/internal/pycore_list.h
M Include/internal/pycore_object.h
M Objects/listobject.c
M Objects/tupleobject.c
diff --git a/Include/internal/pycore_list.h b/Include/internal/pycore_list.h
index 7721f6c2e222a..691d13bc8d9ff 100644
--- a/Include/internal/pycore_list.h
+++ b/Include/internal/pycore_list.h
@@ -56,6 +56,19 @@ _PyList_AppendTakeRef(PyListObject *self, PyObject *newitem)
return _PyList_AppendTakeRefListResize(self, newitem);
}
+// Repeat the bytes of a buffer in place
+static inline void
+_Py_memory_repeat(char* dest, Py_ssize_t len_dest, Py_ssize_t len_src)
+{
+ assert(len_src > 0);
+ Py_ssize_t copied = len_src;
+ while (copied < len_dest) {
+ Py_ssize_t bytes_to_copy = Py_MIN(copied, len_dest - copied);
+ memcpy(dest + copied, dest, bytes_to_copy);
+ copied += bytes_to_copy;
+ }
+}
+
typedef struct {
PyObject_HEAD
Py_ssize_t it_index;
diff --git a/Include/internal/pycore_object.h b/Include/internal/pycore_object.h
index 1bb2da71704dd..c7490799d816a 100644
--- a/Include/internal/pycore_object.h
+++ b/Include/internal/pycore_object.h
@@ -37,6 +37,16 @@ PyAPI_FUNC(void) _Py_NO_RETURN _Py_FatalRefcountErrorFunc(
#define _Py_FatalRefcountError(message) \
_Py_FatalRefcountErrorFunc(__func__, (message))
+// Increment reference count by n
+static inline void _Py_RefcntAdd(PyObject* op, Py_ssize_t n)
+{
+#ifdef Py_REF_DEBUG
+ _Py_RefTotal += n;
+#endif
+ op->ob_refcnt += n;
+}
+#define _Py_RefcntAdd(op, n) _Py_RefcntAdd(_PyObject_CAST(op), n)
+
static inline void
_Py_DECREF_SPECIALIZED(PyObject *op, const destructor destruct)
{
diff --git a/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst b/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst
new file mode 100644
index 0000000000000..72a39b5ebc710
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2022-03-22-13-12-27.bpo-47091.tJcy-P.rst
@@ -0,0 +1 @@
+Improve performance of repetition of :class:`list` and :class:`tuple` by using ``memcpy`` to copy data and performing the reference increments in one step.
diff --git a/Objects/listobject.c b/Objects/listobject.c
index 822954bee432f..9afa68f9fe100 100644
--- a/Objects/listobject.c
+++ b/Objects/listobject.c
@@ -551,47 +551,41 @@ list_concat(PyListObject *a, PyObject *bb)
static PyObject *
list_repeat(PyListObject *a, Py_ssize_t n)
{
- Py_ssize_t size;
- PyListObject *np;
- if (n < 0)
- n = 0;
- if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n)
- return PyErr_NoMemory();
- size = Py_SIZE(a) * n;
- if (size == 0)
+ const Py_ssize_t input_size = Py_SIZE(a);
+ if (input_size == 0 || n <= 0)
return PyList_New(0);
- np = (PyListObject *) list_new_prealloc(size);
+ assert(n > 0);
+
+ if (input_size > PY_SSIZE_T_MAX / n)
+ return PyErr_NoMemory();
+ Py_ssize_t output_size = input_size * n;
+
+ PyListObject *np = (PyListObject *) list_new_prealloc(output_size);
if (np == NULL)
return NULL;
+
PyObject **dest = np->ob_item;
- PyObject **dest_end = dest + size;
- if (Py_SIZE(a) == 1) {
+ if (input_size == 1) {
PyObject *elem = a->ob_item[0];
- Py_SET_REFCNT(elem, Py_REFCNT(elem) + n);
-#ifdef Py_REF_DEBUG
- _Py_RefTotal += n;
-#endif
+ _Py_RefcntAdd(elem, n);
+ PyObject **dest_end = dest + output_size;
while (dest < dest_end) {
*dest++ = elem;
}
}
else {
PyObject **src = a->ob_item;
- PyObject **src_end = src + Py_SIZE(a);
+ PyObject **src_end = src + input_size;
while (src < src_end) {
- Py_SET_REFCNT(*src, Py_REFCNT(*src) + n);
-#ifdef Py_REF_DEBUG
- _Py_RefTotal += n;
-#endif
- *dest++ = *src++;
- }
- // Now src chases after dest in the same buffer
- src = np->ob_item;
- while (dest < dest_end) {
+ _Py_RefcntAdd(*src, n);
*dest++ = *src++;
}
+
+ _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size,
+ sizeof(PyObject *)*input_size);
}
- Py_SET_SIZE(np, size);
+
+ Py_SET_SIZE(np, output_size);
return (PyObject *) np;
}
@@ -743,12 +737,8 @@ PyList_SetSlice(PyObject *a, Py_ssize_t ilow, Py_ssize_t ihigh, PyObject *v)
static PyObject *
list_inplace_repeat(PyListObject *self, Py_ssize_t n)
{
- PyObject **items;
- Py_ssize_t size, i, j, p;
-
-
- size = PyList_GET_SIZE(self);
- if (size == 0 || n == 1) {
+ Py_ssize_t input_size = PyList_GET_SIZE(self);
+ if (input_size == 0 || n == 1) {
Py_INCREF(self);
return (PyObject *)self;
}
@@ -759,22 +749,21 @@ list_inplace_repeat(PyListObject *self, Py_ssize_t n)
return (PyObject *)self;
}
- if (size > PY_SSIZE_T_MAX / n) {
+ if (input_size > PY_SSIZE_T_MAX / n) {
return PyErr_NoMemory();
}
+ Py_ssize_t output_size = input_size * n;
- if (list_resize(self, size*n) < 0)
+ if (list_resize(self, output_size) < 0)
return NULL;
- p = size;
- items = self->ob_item;
- for (i = 1; i < n; i++) { /* Start counting at 1, not 0 */
- for (j = 0; j < size; j++) {
- PyObject *o = items[j];
- Py_INCREF(o);
- items[p++] = o;
- }
+ PyObject **items = self->ob_item;
+ for (Py_ssize_t j = 0; j < input_size; j++) {
+ _Py_RefcntAdd(items[j], n-1);
}
+ _Py_memory_repeat((char *)items, sizeof(PyObject *)*output_size,
+ sizeof(PyObject *)*input_size);
+
Py_INCREF(self);
return (PyObject *)self;
}
diff --git a/Objects/tupleobject.c b/Objects/tupleobject.c
index dfb8597b876e5..240af0a907527 100644
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@@ -495,9 +495,8 @@ tupleconcat(PyTupleObject *a, PyObject *bb)
static PyObject *
tuplerepeat(PyTupleObject *a, Py_ssize_t n)
{
- Py_ssize_t size;
- PyTupleObject *np;
- if (Py_SIZE(a) == 0 || n == 1) {
+ const Py_ssize_t input_size = Py_SIZE(a);
+ if (input_size == 0 || n == 1) {
if (PyTuple_CheckExact(a)) {
/* Since tuples are immutable, we can return a shared
copy in this case */
@@ -505,42 +504,38 @@ tuplerepeat(PyTupleObject *a, Py_ssize_t n)
return (PyObject *)a;
}
}
- if (Py_SIZE(a) == 0 || n <= 0) {
+ if (input_size == 0 || n <= 0) {
return tuple_get_empty();
}
- if (n > PY_SSIZE_T_MAX / Py_SIZE(a))
+ assert(n>0);
+
+ if (input_size > PY_SSIZE_T_MAX / n)
return PyErr_NoMemory();
- size = Py_SIZE(a) * n;
- np = tuple_alloc(size);
+ Py_ssize_t output_size = input_size * n;
+
+ PyTupleObject *np = tuple_alloc(output_size);
if (np == NULL)
return NULL;
+
PyObject **dest = np->ob_item;
- PyObject **dest_end = dest + size;
- if (Py_SIZE(a) == 1) {
+ if (input_size == 1) {
PyObject *elem = a->ob_item[0];
- Py_SET_REFCNT(elem, Py_REFCNT(elem) + n);
-#ifdef Py_REF_DEBUG
- _Py_RefTotal += n;
-#endif
+ _Py_RefcntAdd(elem, n);
+ PyObject **dest_end = dest + output_size;
while (dest < dest_end) {
*dest++ = elem;
}
}
else {
PyObject **src = a->ob_item;
- PyObject **src_end = src + Py_SIZE(a);
+ PyObject **src_end = src + input_size;
while (src < src_end) {
- Py_SET_REFCNT(*src, Py_REFCNT(*src) + n);
-#ifdef Py_REF_DEBUG
- _Py_RefTotal += n;
-#endif
- *dest++ = *src++;
- }
- // Now src chases after dest in the same buffer
- src = np->ob_item;
- while (dest < dest_end) {
+ _Py_RefcntAdd(*src, n);
*dest++ = *src++;
}
+
+ _Py_memory_repeat((char *)np->ob_item, sizeof(PyObject *)*output_size,
+ sizeof(PyObject *)*input_size);
}
_PyObject_GC_TRACK(np);
return (PyObject *) np;
More information about the Python-checkins
mailing list