[Python-checkins] bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)

Victor Stinner webhook-mailer at python.org
Tue Mar 19 19:05:55 EDT 2019


https://github.com/python/cpython/commit/c70ab02df2894c34da2223fc3798c0404b41fd79
commit: c70ab02df2894c34da2223fc3798c0404b41fd79
branch: master
author: Victor Stinner <vstinner at redhat.com>
committer: GitHub <noreply at github.com>
date: 2019-03-20T00:05:51+01:00
summary:

bpo-36365: Rewrite structseq_repr() using _PyUnicodeWriter (GH-12440)

No longer limit repr(structseq) to 512 bytes. Use _PyUnicodeWriter
for better performance and to write directly Unicode rather than
encoding repr() value to UTF-8 and then decoding from UTF-8.

files:
A Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst
M Objects/structseq.c

diff --git a/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst
new file mode 100644
index 000000000000..206de56f08fb
--- /dev/null
+++ b/Misc/NEWS.d/next/Core and Builtins/2019-03-19-15-58-23.bpo-36365.jHaErz.rst	
@@ -0,0 +1 @@
+repr(structseq) is no longer limited to 512 bytes.
diff --git a/Objects/structseq.c b/Objects/structseq.c
index 1c37845950fe..5278313ffdce 100644
--- a/Objects/structseq.c
+++ b/Objects/structseq.c
@@ -168,78 +168,88 @@ structseq_new_impl(PyTypeObject *type, PyObject *arg, PyObject *dict)
 static PyObject *
 structseq_repr(PyStructSequence *obj)
 {
-    /* buffer and type size were chosen well considered. */
-#define REPR_BUFFER_SIZE 512
-#define TYPE_MAXSIZE 100
-
     PyTypeObject *typ = Py_TYPE(obj);
-    Py_ssize_t i;
-    int removelast = 0;
-    Py_ssize_t len;
-    char buf[REPR_BUFFER_SIZE];
-    char *endofbuf, *pbuf = buf;
-
-    /* pointer to end of writeable buffer; safes space for "...)\0" */
-    endofbuf= &buf[REPR_BUFFER_SIZE-5];
-
-    /* "typename(", limited to  TYPE_MAXSIZE */
-    len = strlen(typ->tp_name);
-    len = Py_MIN(len, TYPE_MAXSIZE);
-    memcpy(pbuf, typ->tp_name, len);
-    pbuf += len;
-    *pbuf++ = '(';
-
-    for (i=0; i < VISIBLE_SIZE(obj); i++) {
-        PyObject *val, *repr;
-        const char *cname, *crepr;
-
-        cname = typ->tp_members[i].name;
-        if (cname == NULL) {
+    _PyUnicodeWriter writer;
+
+    /* Write "typename(" */
+    PyObject *type_name = PyUnicode_DecodeUTF8(typ->tp_name,
+                                               strlen(typ->tp_name),
+                                               NULL);
+    if (type_name == NULL) {
+        goto error;
+    }
+
+    _PyUnicodeWriter_Init(&writer);
+    writer.overallocate = 1;
+    /* count 5 characters per item: "x=1, " */
+    writer.min_length = (PyUnicode_GET_LENGTH(type_name) + 1
+                         + VISIBLE_SIZE(obj) * 5 + 1);
+
+    if (_PyUnicodeWriter_WriteStr(&writer, type_name) < 0) {
+        Py_DECREF(type_name);
+        goto error;
+    }
+    Py_DECREF(type_name);
+
+    if (_PyUnicodeWriter_WriteChar(&writer, '(') < 0) {
+        goto error;
+    }
+
+    for (Py_ssize_t i=0; i < VISIBLE_SIZE(obj); i++) {
+        if (i > 0) {
+            /* Write ", " */
+            if (_PyUnicodeWriter_WriteASCIIString(&writer, ", ", 2) < 0) {
+                goto error;
+            }
+        }
+
+        /* Write "name=repr" */
+        const char *name_utf8 = typ->tp_members[i].name;
+        if (name_utf8 == NULL) {
             PyErr_Format(PyExc_SystemError, "In structseq_repr(), member %zd name is NULL"
                          " for type %.500s", i, typ->tp_name);
-            return NULL;
+            goto error;
         }
-        val = PyStructSequence_GET_ITEM(obj, i);
-        repr = PyObject_Repr(val);
-        if (repr == NULL)
-            return NULL;
-        crepr = PyUnicode_AsUTF8(repr);
-        if (crepr == NULL) {
-            Py_DECREF(repr);
-            return NULL;
+
+        PyObject *name = PyUnicode_DecodeUTF8(name_utf8, strlen(name_utf8), NULL);
+        if (name == NULL) {
+            goto error;
+        }
+        if (_PyUnicodeWriter_WriteStr(&writer, name) < 0) {
+            Py_DECREF(name);
+            goto error;
         }
+        Py_DECREF(name);
 
-        /* + 3: keep space for "=" and ", " */
-        len = strlen(cname) + strlen(crepr) + 3;
-        if ((pbuf+len) <= endofbuf) {
-            strcpy(pbuf, cname);
-            pbuf += strlen(cname);
-            *pbuf++ = '=';
-            strcpy(pbuf, crepr);
-            pbuf += strlen(crepr);
-            *pbuf++ = ',';
-            *pbuf++ = ' ';
-            removelast = 1;
-            Py_DECREF(repr);
+        if (_PyUnicodeWriter_WriteChar(&writer, '=') < 0) {
+            goto error;
         }
-        else {
-            strcpy(pbuf, "...");
-            pbuf += 3;
-            removelast = 0;
+
+        PyObject *value = PyStructSequence_GET_ITEM(obj, i);
+        assert(value != NULL);
+        PyObject *repr = PyObject_Repr(value);
+        if (repr == NULL) {
+            goto error;
+        }
+        if (_PyUnicodeWriter_WriteStr(&writer, repr) < 0) {
             Py_DECREF(repr);
-            break;
+            goto error;
         }
+        Py_DECREF(repr);
     }
-    if (removelast) {
-        /* overwrite last ", " */
-        pbuf-=2;
+
+    if (_PyUnicodeWriter_WriteChar(&writer, ')') < 0) {
+        goto error;
     }
-    *pbuf++ = ')';
-    *pbuf = '\0';
 
-    return PyUnicode_FromString(buf);
+    return _PyUnicodeWriter_Finish(&writer);
+
+error:
+    _PyUnicodeWriter_Dealloc(&writer);
+    return NULL;
 }
 
+
 static PyObject *
 structseq_reduce(PyStructSequence* self, PyObject *Py_UNUSED(ignored))
 {



More information about the Python-checkins mailing list