[Python-checkins] r86036 - in python/branches/py3k: Doc/library/operator.rst Lib/test/test_operator.py Misc/NEWS Modules/operator.c

antoine.pitrou python-checkins at python.org
Sun Oct 31 16:26:04 CET 2010


Author: antoine.pitrou
Date: Sun Oct 31 16:26:04 2010
New Revision: 86036

Log:
Issue #10160: Speed up operator.attrgetter.  Patch by Christos Georgiou.




Modified:
   python/branches/py3k/Doc/library/operator.rst
   python/branches/py3k/Lib/test/test_operator.py
   python/branches/py3k/Misc/NEWS
   python/branches/py3k/Modules/operator.c

Modified: python/branches/py3k/Doc/library/operator.rst
==============================================================================
--- python/branches/py3k/Doc/library/operator.rst	(original)
+++ python/branches/py3k/Doc/library/operator.rst	Sun Oct 31 16:26:04 2010
@@ -336,6 +336,8 @@
    b.date)``.  Equivalent to::
 
       def attrgetter(*items):
+          if any(not isinstance(item, str) for item in items):
+              raise TypeError('attribute name must be a string')
           if len(items) == 1:
               attr = items[0]
               def g(obj):

Modified: python/branches/py3k/Lib/test/test_operator.py
==============================================================================
--- python/branches/py3k/Lib/test/test_operator.py	(original)
+++ python/branches/py3k/Lib/test/test_operator.py	Sun Oct 31 16:26:04 2010
@@ -275,8 +275,7 @@
         self.assertEqual(f(a), 'arthur')
         f = operator.attrgetter('rank')
         self.assertRaises(AttributeError, f, a)
-        f = operator.attrgetter(2)
-        self.assertRaises(TypeError, f, a)
+        self.assertRaises(TypeError, operator.attrgetter, 2)
         self.assertRaises(TypeError, operator.attrgetter)
 
         # multiple gets
@@ -285,7 +284,7 @@
         record.y = 'Y'
         record.z = 'Z'
         self.assertEqual(operator.attrgetter('x','z','y')(record), ('X', 'Z', 'Y'))
-        self.assertRaises(TypeError, operator.attrgetter('x', (), 'y'), record)
+        self.assertRaises(TypeError, operator.attrgetter, ('x', (), 'y'))
 
         class C(object):
             def __getattr__(self, name):
@@ -304,6 +303,10 @@
         self.assertEqual(f(a), ('arthur', 'thomas'))
         f = operator.attrgetter('name', 'child.name', 'child.child.name')
         self.assertRaises(AttributeError, f, a)
+        f = operator.attrgetter('child.')
+        self.assertRaises(AttributeError, f, a)
+        f = operator.attrgetter('.child')
+        self.assertRaises(AttributeError, f, a)
 
         a.child.child = A()
         a.child.child.name = 'johnson'

Modified: python/branches/py3k/Misc/NEWS
==============================================================================
--- python/branches/py3k/Misc/NEWS	(original)
+++ python/branches/py3k/Misc/NEWS	Sun Oct 31 16:26:04 2010
@@ -59,6 +59,8 @@
 Library
 -------
 
+- Issue #10160: Speed up operator.attrgetter.  Patch by Christos Georgiou.
+
 - logging: Added style option to basicConfig() to allow %, {} or $-formatting.
 
 - Issue #5729:  json.dumps() now supports using a string such as '\t'

Modified: python/branches/py3k/Modules/operator.c
==============================================================================
--- python/branches/py3k/Modules/operator.c	(original)
+++ python/branches/py3k/Modules/operator.c	Sun Oct 31 16:26:04 2010
@@ -383,7 +383,7 @@
 {
     attrgetterobject *ag;
     PyObject *attr;
-    Py_ssize_t nattrs;
+    Py_ssize_t nattrs, idx, char_idx;
 
     if (!_PyArg_NoKeywords("attrgetter()", kwds))
         return NULL;
@@ -392,15 +392,92 @@
     if (nattrs <= 1) {
         if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &attr))
             return NULL;
-    } else
-        attr = args;
+    }
+
+    attr = PyTuple_New(nattrs);
+    if (attr == NULL)
+        return NULL;
+
+    /* prepare attr while checking args */
+    for (idx = 0; idx < nattrs; ++idx) {
+        PyObject *item = PyTuple_GET_ITEM(args, idx);
+        Py_ssize_t item_len;
+        Py_UNICODE *item_buffer;
+        int dot_count;
+
+        if (!PyUnicode_Check(item)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "attribute name must be a string");
+            Py_DECREF(attr);
+            return NULL;
+        }
+        item_len = PyUnicode_GET_SIZE(item);
+        item_buffer = PyUnicode_AS_UNICODE(item);
+
+        /* check whethere the string is dotted */
+        dot_count = 0;
+        for (char_idx = 0; char_idx < item_len; ++char_idx) {
+            if (item_buffer[char_idx] == (Py_UNICODE)'.')
+                ++dot_count;
+        }
+
+        if (dot_count == 0) {
+            Py_INCREF(item);
+            PyUnicode_InternInPlace(&item);
+            PyTuple_SET_ITEM(attr, idx, item);
+        } else { /* make it a tuple of non-dotted attrnames */
+            PyObject *attr_chain = PyTuple_New(dot_count + 1);
+            PyObject *attr_chain_item;
+
+            if (attr_chain == NULL) {
+                Py_DECREF(attr);
+                return NULL;
+            }
+
+            Py_ssize_t unibuff_from = 0;
+            Py_ssize_t unibuff_till = 0;
+            Py_ssize_t attr_chain_idx = 0;
+            for (; dot_count > 0; --dot_count) {
+                while (item_buffer[unibuff_till] != (Py_UNICODE)'.') {
+                    ++unibuff_till;
+                }
+                attr_chain_item = PyUnicode_FromUnicode(
+                                      item_buffer + unibuff_from,
+                                      unibuff_till - unibuff_from);
+                if (attr_chain_item == NULL) {
+                    Py_DECREF(attr_chain);
+                    Py_DECREF(attr);
+                    return NULL;
+                }
+                PyUnicode_InternInPlace(&attr_chain_item);
+                PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item);
+                ++attr_chain_idx;
+                unibuff_till = unibuff_from = unibuff_till + 1;
+            }
+
+            /* now add the last dotless name */
+            attr_chain_item = PyUnicode_FromUnicode(
+                                  item_buffer + unibuff_from,
+                                  item_len - unibuff_from);
+            if (attr_chain_item == NULL) {
+                Py_DECREF(attr_chain);
+                Py_DECREF(attr);
+                return NULL;
+            }
+            PyUnicode_InternInPlace(&attr_chain_item);
+            PyTuple_SET_ITEM(attr_chain, attr_chain_idx, attr_chain_item);
+
+            PyTuple_SET_ITEM(attr, idx, attr_chain);
+        }
+    }
 
     /* create attrgetterobject structure */
     ag = PyObject_GC_New(attrgetterobject, &attrgetter_type);
-    if (ag == NULL)
+    if (ag == NULL) {
+        Py_DECREF(attr);
         return NULL;
+    }
 
-    Py_INCREF(attr);
     ag->attr = attr;
     ag->nattrs = nattrs;
 
@@ -426,33 +503,31 @@
 static PyObject *
 dotted_getattr(PyObject *obj, PyObject *attr)
 {
-    char *s, *p;
+    PyObject *newobj;
 
-    if (!PyUnicode_Check(attr)) {
-        PyErr_SetString(PyExc_TypeError,
-                        "attribute name must be a string");
-        return NULL;
-    }
-
-    s = _PyUnicode_AsString(attr);
-    Py_INCREF(obj);
-    for (;;) {
-        PyObject *newobj, *str;
-        p = strchr(s, '.');
-        str = p ? PyUnicode_FromStringAndSize(s, (p-s)) :
-              PyUnicode_FromString(s);
-        if (str == NULL) {
+    /* attr is either a tuple or instance of str.
+       Ensured by the setup code of attrgetter_new */
+    if (PyTuple_CheckExact(attr)) { /* chained getattr */
+        Py_ssize_t name_idx = 0, name_count;
+        PyObject *attr_name;
+
+        name_count = PyTuple_GET_SIZE(attr);
+        Py_INCREF(obj);
+        for (name_idx = 0; name_idx < name_count; ++name_idx) {
+            attr_name = PyTuple_GET_ITEM(attr, name_idx);
+            newobj = PyObject_GetAttr(obj, attr_name);
             Py_DECREF(obj);
-            return NULL;
+            if (newobj == NULL) {
+                return NULL;
+            }
+            /* here */
+            obj = newobj;
         }
-        newobj = PyObject_GetAttr(obj, str);
-        Py_DECREF(str);
-        Py_DECREF(obj);
+    } else { /* single getattr */
+        newobj = PyObject_GetAttr(obj, attr);
         if (newobj == NULL)
             return NULL;
         obj = newobj;
-        if (p == NULL) break;
-        s = p+1;
     }
 
     return obj;
@@ -466,8 +541,8 @@
 
     if (!PyArg_UnpackTuple(args, "attrgetter", 1, 1, &obj))
         return NULL;
-    if (ag->nattrs == 1)
-        return dotted_getattr(obj, ag->attr);
+    if (ag->nattrs == 1) /* ag->attr is always a tuple */
+        return dotted_getattr(obj, PyTuple_GET_ITEM(ag->attr, 0));
 
     assert(PyTuple_Check(ag->attr));
     assert(PyTuple_GET_SIZE(ag->attr) == nattrs);


More information about the Python-checkins mailing list