[Python-checkins] bpo-32492: 1.6x speed up in namedtuple attribute access using C fast-path (#10495)

Raymond Hettinger webhook-mailer at python.org
Sun Dec 30 04:24:14 EST 2018


https://github.com/python/cpython/commit/3f5fc70c6213008243e7d605f7d8a2d8f94cf919
commit: 3f5fc70c6213008243e7d605f7d8a2d8f94cf919
branch: master
author: Pablo Galindo <Pablogsal at gmail.com>
committer: Raymond Hettinger <rhettinger at users.noreply.github.com>
date: 2018-12-30T01:24:03-08:00
summary:

bpo-32492: 1.6x speed up in namedtuple attribute access using C fast-path (#10495)

* bpo-32492: 2.5x speed up in namedtuple attribute access using C fast path

* Add News entry

* fixup! bpo-32492: 2.5x speed up in namedtuple attribute access using C fast path

* Check for tuple in the __get__ of the new descriptor and don't cache the descriptor itself

* Don't inherit from property. Implement GC methods to handle __doc__

* Add a test for the docstring substitution in descriptors

* Update NEWS entry to reflect time against 3.7 branch

* Simplify implementation with argument clinic, better error messages, only __new__

* Use positional-only parameters for the __new__

* Use PyTuple_GET_SIZE and PyTuple_GET_ITEM to tighter the implementation of tuplegetterdescr_get

* Implement __set__ to make tuplegetter a data descriptor

* Use Py_INCREF now that we inline PyTuple_GetItem

* Apply the valid_index() function, saving one test

* Move Py_None test out of the critical path.

files:
A Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst
A Modules/clinic/_collectionsmodule.c.h
M Lib/collections/__init__.py
M Lib/test/test_collections.py
M Modules/_collectionsmodule.c

diff --git a/Lib/collections/__init__.py b/Lib/collections/__init__.py
index 4724b0edf33a..0b74c3f8915f 100644
--- a/Lib/collections/__init__.py
+++ b/Lib/collections/__init__.py
@@ -311,6 +311,11 @@ def __eq__(self, other):
 ### namedtuple
 ################################################################################
 
+try:
+    from _collections import _tuplegetter
+except ImportError:
+    _tuplegetter = lambda index, doc: property(_itemgetter(index), doc=doc)
+
 _nt_itemgetters = {}
 
 def namedtuple(typename, field_names, *, rename=False, defaults=None, module=None):
@@ -454,12 +459,13 @@ def __getnewargs__(self):
     cache = _nt_itemgetters
     for index, name in enumerate(field_names):
         try:
-            itemgetter_object, doc = cache[index]
+            doc = cache[index]
         except KeyError:
-            itemgetter_object = _itemgetter(index)
             doc = f'Alias for field number {index}'
-            cache[index] = itemgetter_object, doc
-        class_namespace[name] = property(itemgetter_object, doc=doc)
+            cache[index] = doc
+
+        tuplegetter_object = _tuplegetter(index, doc)
+        class_namespace[name] = tuplegetter_object
 
     result = type(typename, (tuple,), class_namespace)
 
diff --git a/Lib/test/test_collections.py b/Lib/test/test_collections.py
index e2ffaaafd454..c0815947f908 100644
--- a/Lib/test/test_collections.py
+++ b/Lib/test/test_collections.py
@@ -514,6 +514,14 @@ class Point(namedtuple('_Point', ['x', 'y'])):
         a.w = 5
         self.assertEqual(a.__dict__, {'w': 5})
 
+    def test_namedtuple_can_mutate_doc_of_descriptors_independently(self):
+        A = namedtuple('A', 'x y')
+        B = namedtuple('B', 'x y')
+        A.x.__doc__ = 'foo'
+        B.x.__doc__ = 'bar'
+        self.assertEqual(A.x.__doc__, 'foo')
+        self.assertEqual(B.x.__doc__, 'bar')
+
 
 ################################################################################
 ### Abstract Base Classes
diff --git a/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst b/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst
new file mode 100644
index 000000000000..24682b14f36d
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2018-11-13-01-03-10.bpo-32492.voIdcp.rst	
@@ -0,0 +1,2 @@
+Speed up :class:`namedtuple` attribute access by 1.6x using a C fast-path
+for the name descriptors. Patch by Pablo Galindo.
diff --git a/Modules/_collectionsmodule.c b/Modules/_collectionsmodule.c
index 4da0662f0c7f..cc325e1aaac4 100644
--- a/Modules/_collectionsmodule.c
+++ b/Modules/_collectionsmodule.c
@@ -7,6 +7,14 @@
 #include <sys/types.h>          /* For size_t */
 #endif
 
+/*[clinic input]
+class _tuplegetter "_tuplegetterobject *" "&tuplegetter_type"
+[clinic start generated code]*/
+/*[clinic end generated code: output=da39a3ee5e6b4b0d input=ee5ed5baabe35068]*/
+
+static PyTypeObject tuplegetter_type;
+#include "clinic/_collectionsmodule.c.h"
+
 /* collections module implementation of a deque() datatype
    Written and maintained by Raymond D. Hettinger <python at rcn.com>
 */
@@ -2328,6 +2336,156 @@ _count_elements(PyObject *self, PyObject *args)
     Py_RETURN_NONE;
 }
 
+/* Helper functions for namedtuples */
+
+typedef struct {
+    PyObject_HEAD
+    Py_ssize_t index;
+    PyObject* doc;
+} _tuplegetterobject;
+
+/*[clinic input]
+ at classmethod
+_tuplegetter.__new__ as tuplegetter_new
+
+    index: Py_ssize_t
+    doc: object
+    /
+[clinic start generated code]*/
+
+static PyObject *
+tuplegetter_new_impl(PyTypeObject *type, Py_ssize_t index, PyObject *doc)
+/*[clinic end generated code: output=014be444ad80263f input=87c576a5bdbc0bbb]*/
+{
+    _tuplegetterobject* self;
+    self = (_tuplegetterobject *)type->tp_alloc(type, 0);
+    if (self == NULL) {
+        return NULL;
+    }
+    self->index = index;
+    Py_INCREF(doc);
+    self->doc = doc;
+    return (PyObject *)self;
+}
+
+static PyObject *
+tuplegetterdescr_get(PyObject *self, PyObject *obj, PyObject *type)
+{
+    PyObject *result;
+    if (obj == NULL) {
+        Py_INCREF(self);
+        return self;
+    }
+    if (!PyTuple_Check(obj)) {
+        if (obj == Py_None) {
+            Py_INCREF(self);
+            return self;
+        }
+        PyErr_Format(PyExc_TypeError,
+                     "descriptor for index '%d' for tuple subclasses "
+                     "doesn't apply to '%s' object",
+                     ((_tuplegetterobject*)self)->index,
+                     obj->ob_type->tp_name);
+        return NULL;
+    }
+
+    Py_ssize_t index = ((_tuplegetterobject*)self)->index;
+
+    if (!valid_index(index, PyTuple_GET_SIZE(obj))) {
+        PyErr_SetString(PyExc_IndexError, "tuple index out of range");
+        return NULL;
+    }
+
+    result = PyTuple_GET_ITEM(obj, index);
+    Py_INCREF(result);
+    return result;
+}
+
+static int
+tuplegetter_set(PyObject *self, PyObject *obj, PyObject *value)
+{
+    if (value == NULL) {
+        PyErr_SetString(PyExc_AttributeError, "can't delete attribute");
+    } else {
+        PyErr_SetString(PyExc_AttributeError, "can't set attribute");
+    }
+    return -1;
+}
+
+static int
+tuplegetter_traverse(PyObject *self, visitproc visit, void *arg)
+{
+    _tuplegetterobject *tuplegetter = (_tuplegetterobject *)self;
+    Py_VISIT(tuplegetter->doc);
+    return 0;
+}
+
+static int
+tuplegetter_clear(PyObject *self)
+{
+    _tuplegetterobject *tuplegetter = (_tuplegetterobject *)self;
+    Py_CLEAR(tuplegetter->doc);
+    return 0;
+}
+
+static void
+tuplegetter_dealloc(_tuplegetterobject *self)
+{
+    PyObject_GC_UnTrack(self);
+    tuplegetter_clear((PyObject*)self);
+    Py_TYPE(self)->tp_free((PyObject*)self);
+}
+
+
+static PyMemberDef tuplegetter_members[] = {
+    {"__doc__",  T_OBJECT, offsetof(_tuplegetterobject, doc), 0},
+    {0}
+};
+
+static PyTypeObject tuplegetter_type = {
+    PyVarObject_HEAD_INIT(NULL, 0)
+    "_collections._tuplegetter",                /* tp_name */
+    sizeof(_tuplegetterobject),                 /* tp_basicsize */
+    0,                                          /* tp_itemsize */
+    /* methods */
+    (destructor)tuplegetter_dealloc,            /* tp_dealloc */
+    0,                                          /* tp_print */
+    0,                                          /* tp_getattr */
+    0,                                          /* tp_setattr */
+    0,                                          /* tp_reserved */
+    0,                                          /* tp_repr */
+    0,                                          /* tp_as_number */
+    0,                                          /* tp_as_sequence */
+    0,                                          /* tp_as_mapping */
+    0,                                          /* tp_hash */
+    0,                                          /* tp_call */
+    0,                                          /* tp_str */
+    0,                                          /* tp_getattro */
+    0,                                          /* tp_setattro */
+    0,                                          /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,    /* tp_flags */
+    0,                                          /* tp_doc */
+    (traverseproc)tuplegetter_traverse,         /* tp_traverse */
+    (inquiry)tuplegetter_clear,                 /* tp_clear */
+    0,                                          /* tp_richcompare */
+    0,                                          /* tp_weaklistoffset */
+    0,                                          /* tp_iter */
+    0,                                          /* tp_iternext */
+    0,                                          /* tp_methods */
+    tuplegetter_members,                        /* tp_members */
+    0,                                          /* tp_getset */
+    0,                                          /* tp_base */
+    0,                                          /* tp_dict */
+    tuplegetterdescr_get,                       /* tp_descr_get */
+    tuplegetter_set,                            /* tp_descr_set */
+    0,                                          /* tp_dictoffset */
+    0,                                          /* tp_init */
+    0,                                          /* tp_alloc */
+    tuplegetter_new,                            /* tp_new */
+    0,
+};
+
+
 /* module level code ********************************************************/
 
 PyDoc_STRVAR(module_doc,
@@ -2386,5 +2544,10 @@ PyInit__collections(void)
     Py_INCREF(&dequereviter_type);
     PyModule_AddObject(m, "_deque_reverse_iterator", (PyObject *)&dequereviter_type);
 
+    if (PyType_Ready(&tuplegetter_type) < 0)
+        return NULL;
+    Py_INCREF(&tuplegetter_type);
+    PyModule_AddObject(m, "_tuplegetter", (PyObject *)&tuplegetter_type);
+
     return m;
 }
diff --git a/Modules/clinic/_collectionsmodule.c.h b/Modules/clinic/_collectionsmodule.c.h
new file mode 100644
index 000000000000..12626c1f6618
--- /dev/null
+++ b/Modules/clinic/_collectionsmodule.c.h
@@ -0,0 +1,28 @@
+/*[clinic input]
+preserve
+[clinic start generated code]*/
+
+static PyObject *
+tuplegetter_new_impl(PyTypeObject *type, Py_ssize_t index, PyObject *doc);
+
+static PyObject *
+tuplegetter_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
+{
+    PyObject *return_value = NULL;
+    Py_ssize_t index;
+    PyObject *doc;
+
+    if ((type == &tuplegetter_type) &&
+        !_PyArg_NoKeywords("_tuplegetter", kwargs)) {
+        goto exit;
+    }
+    if (!PyArg_ParseTuple(args, "nO:_tuplegetter",
+        &index, &doc)) {
+        goto exit;
+    }
+    return_value = tuplegetter_new_impl(type, index, doc);
+
+exit:
+    return return_value;
+}
+/*[clinic end generated code: output=83746071eacc28d3 input=a9049054013a1b77]*/



More information about the Python-checkins mailing list