[Python-3000] patch: bytes object PyBUF_LOCKDATA read-only and immutable support

Gregory P. Smith greg at krypto.org
Thu Aug 30 01:47:28 CEST 2007


Attached is what I've come up with so far.  Only a single field is
added to the PyBytesObject struct.  This adds support to the bytes
object for PyBUF_LOCKDATA buffer API operation.  bytes objects can be
marked temporarily read-only for use while the buffer api has handed
them off to something which may run without the GIL (think IO).  Any
attempt to modify them during that time will raise an exception as I
believe Martin suggested earlier.

As an added bonus because its been discussed here, support for setting
a bytes object immutable has been added since its pretty trivial once
the read only export support was in place.  Thats not required but was
trivial to include.

I'd appreciate any feedback.

My TODO list for this patch:

 0. Get feedback and make adjustments as necessary.

 1. Deciding between PyBUF_SIMPLE and PyBUF_WRITEABLE for the internal
    uses of the _getbuffer() function.  bytesobject.c contains both readonly
    and read-write uses of the buffers, i'll add boolean parameter for
    that.

 2. More testing: a few tests in the test suite fail after this but the
    number was low and I haven't had time to look at why or what the
    failures were.

 3. Exporting methods suggested in the TODO at the top of the file.

 4. Unit tests for all of the functionality this adds.

NOTE: after these changes I had to make clean and rm -rf build before
things would not segfault on import.  I suspect some things (modules?)
were not properly recompiled after the bytesobject.h struct change
otherwise.

-gps

-------------- next part --------------
Index: Include/bytesobject.h
===================================================================
--- Include/bytesobject.h	(revision 57679)
+++ Include/bytesobject.h	(working copy)
@@ -17,17 +17,18 @@
  * For the convenience of C programmers, the bytes type is considered
  * to contain a char pointer, not an unsigned char pointer.
  */
 
 /* Object layout */
 typedef struct {
     PyObject_VAR_HEAD
     /* XXX(nnorwitz): should ob_exports be Py_ssize_t? */
-    int ob_exports; /* how many buffer exports */
+    int ob_exports; /* How many buffer exports */
+    int ob_readonly_exports; /* How many buffer exports as readonly */
     Py_ssize_t ob_alloc; /* How many bytes allocated */
     char *ob_bytes;
 } PyBytesObject;
 
 /* Type object */
 PyAPI_DATA(PyTypeObject) PyBytes_Type;
 
 /* Type check macros */
Index: Objects/bytesobject.c
===================================================================
--- Objects/bytesobject.c	(revision 57679)
+++ Objects/bytesobject.c	(working copy)
@@ -1,16 +1,156 @@
 /* Bytes object implementation */
 
 /* XXX TO DO: optimizations */
 
 #define PY_SSIZE_T_CLEAN
 #include "Python.h"
 #include "structmember.h"
 
+/*
+ * Constants for use with the PyBytesObject.ob_readonly_exports.
+ */
+#define IMMUTABLE               (INT_MAX)
+#define MAX_READONLY_EXPORTS    (INT_MAX-1)
+
+/* 
+ * Should we bounds check PyBytesObject.ob_exports and
+ * ob_readonly_exports when we increment them?
+ */
+#if MAX_READONLY_EXPORTS <= USHRT_MAX
+#define BOUNDS_CHECK_EXPORTS 1
+#else
+#undef BOUNDS_CHECK_EXPORTS
+#endif
+
+/*
+ * XXX(gps): I added support for immutability because it was a trivial
+ * addition to the work I was already doing to add PyBUF_READLOCK
+ * support to bytes objects.  It isn't required but is included as an
+ * example to decide if it should stay.
+ *
+ * TODO(gps) Do we want to provide an exported interface for any of 
+ * these inlines for use by C code that uses Bytes objects directly
+ * rather than the buffer API?  I suggest C code should prefer to use
+ * the buffer API (though it is heavier weight).
+ * 
+ *   APIs I think should be public in C and Python:
+ *     is_readonly
+ *     set_immutable  &  is_immutable
+ */
+
+/*
+ * Set a bytes object to be immutable.  If outstanding non-readonly
+ * exports exist this will raise an error instead.  Once immutable,
+ * always immutable.  This cannot be undone.
+ *
+ * Returns: 0 on success, 1 on failure with an exception set.
+ */
+Py_LOCAL_INLINE(int) set_immutable(PyBytesObject *obj)
+{
+    if (obj->ob_exports > 0) {
+        PyErr_SetString(PyExc_BufferError,
+                "bytes with outstanding non-readonly exports"
+                "cannot be made immutable.");
+        return 1;
+    }
+    obj->ob_readonly_exports = IMMUTABLE;
+    return 0;
+}
+
+/*
+ * Is this bytes object immutable?  0: no, 1: yes
+ */
+Py_LOCAL_INLINE(int) is_immutable(PyBytesObject *obj)
+{
+    return obj->ob_readonly_exports == IMMUTABLE;
+}
+
+/*
+ * Is this bytes object currently read only?  0: no, 1: yes
+ */
+Py_LOCAL_INLINE(int) is_readonly(PyBytesObject *obj)
+{
+    assert(is_immutable(obj) || obj->ob_readonly_exports <= obj->ob_exports);
+    return (obj->ob_readonly_exports > 0 && obj->ob_exports == 0);
+}
+
+/*
+ * Increment the export count.  For use by getbuffer.
+ *
+ * Returns: 0 on success, -1 on failure with an exception set.
+ * (-1 matches the required buffer API getbuffer return value)
+ */
+Py_LOCAL_INLINE(int) inc_exports(PyBytesObject *obj)
+{
+    obj->ob_exports++;
+#ifdef BOUNDS_CHECK_EXPORTS
+    if (obj->ob_exports <= 0) {
+        PyErr_SetString(PyExc_RuntimeError,
+                "ob_exports integer overflow");
+        obj->ob_exports--;
+        return -1;
+    }
+#endif
+    return 0;
+}
+
+/*
+ * Decrement the export count.  For use by releasebuffer.
+ */
+Py_LOCAL_INLINE(void) dec_exports(PyBytesObject *obj)
+{
+    obj->ob_exports--;
+}
+
+
+/*
+ * Increment the readonly export count if the object is mutable.
+ * Must be called with the GIL held.
+ *
+ * For use by the buffer API to implement PyBUF_LOCKDATA requests.
+ */
+Py_LOCAL_INLINE(void) inc_readonly_exports(PyBytesObject *obj)
+{
+#ifdef BOUNDS_CHECK_EXPORTS
+    if (obj->ob_readonly_exports == MAX_READONLY_EXPORTS) {
+        /* XXX(gps): include object id in this warning? */
+        PyErr_WarnEx(PyExc_RuntimeWarning,
+            "readonly_exports overflow on bytes object; "
+            "marking it immutable.", 1);
+        obj->ob_readonly_exports = IMMUTABLE;
+    }
+#endif
+    /* 
+     * NOTE: Even if the above check isn't made, the values are such that
+     * incrementing ob_readonly_exports past the max value will cause it
+     * to become immutable (as a partial safety feature).
+     */
+    if (obj->ob_readonly_exports != IMMUTABLE) {
+        obj->ob_readonly_exports++;
+    }
+}
+
+
+/*
+ * Decrement the readonly export count.
+ * Must be called with the GIL held.
+ *
+ * For use by the buffer API to implement PyBUF_LOCKDATA requests.
+ */
+Py_LOCAL_INLINE(void) dec_readonly_exports(PyBytesObject *obj)
+{
+    assert(is_immutable(obj) || obj->ob_readonly_exports <= obj->ob_exports);
+    if (obj->ob_readonly_exports != IMMUTABLE) {
+        obj->ob_readonly_exports--;
+    }
+}
+
+
 /* The nullbytes are used by the stringlib during partition.
  * If partition is removed from bytes, nullbytes and its helper
  * Init/Fini should also be removed.
  */
 static PyBytesObject *nullbytes = NULL;
 
 void
 PyBytes_Fini(void)
@@ -49,35 +189,44 @@
     return 1;
 }
 
 static int
 bytes_getbuffer(PyBytesObject *obj, PyBuffer *view, int flags)
 {
         int ret;
         void *ptr;
+        int readonly = 0;
         if (view == NULL) {
-                obj->ob_exports++;
-                return 0;
+                return inc_exports(obj);
         }
         if (obj->ob_bytes == NULL)
                 ptr = "";
         else
                 ptr = obj->ob_bytes;
-        ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), 0, flags);
+        if (((flags & PyBUF_LOCKDATA) == PyBUF_LOCKDATA) &&
+            obj->ob_exports == 0) {
+                inc_readonly_exports(obj);
+                readonly = -1;
+        } else {
+                readonly = is_readonly(obj);
+        }
+        ret = PyBuffer_FillInfo(view, ptr, Py_Size(obj), readonly, flags);
         if (ret >= 0) {
-                obj->ob_exports++;
+                return inc_exports(obj);
         }
         return ret;
 }
 
 static void
 bytes_releasebuffer(PyBytesObject *obj, PyBuffer *view)
 {
-        obj->ob_exports--;
+        dec_exports(obj);
+        if (view && view->readonly == -1)
+                dec_readonly_exports(obj);
 }
 
 static Py_ssize_t
 _getbuffer(PyObject *obj, PyBuffer *view)
 {
     PyBufferProcs *buffer = Py_Type(obj)->tp_as_buffer;
 
     if (buffer == NULL ||
@@ -85,16 +234,20 @@
         buffer->bf_getbuffer == NULL)
     {
         PyErr_Format(PyExc_TypeError,
                      "Type %.100s doesn't support the buffer API",
                      Py_Type(obj)->tp_name);
         return -1;
     }
 
+    /* 
+     * TODO(gps): make this PyBUF_WRITEABLE?  or just verify sanity on
+     * our own before calling if the GIL is not being relinquished?
+     */
     if (buffer->bf_getbuffer(obj, view, PyBUF_SIMPLE) < 0)
             return -1;
     return view->len;
 }
 
 /* Direct API functions */
 
 PyObject *
@@ -151,26 +304,40 @@
 PyBytes_AsString(PyObject *self)
 {
     assert(self != NULL);
     assert(PyBytes_Check(self));
 
     return PyBytes_AS_STRING(self);
 }
 
+#define SET_RO_ERROR(bo)  do {  \
+            if (is_immutable((PyBytesObject *)(bo)))  \
+                PyErr_SetString(PyExc_BufferError,  \
+                    "Immutable flag set: object cannot be modified"); \
+            else \
+                PyErr_SetString(PyExc_BufferError, \
+                    "Readonly export exists: object cannot be modified"); \
+        } while (0);
+
 int
 PyBytes_Resize(PyObject *self, Py_ssize_t size)
 {
     void *sval;
     Py_ssize_t alloc = ((PyBytesObject *)self)->ob_alloc;
 
     assert(self != NULL);
     assert(PyBytes_Check(self));
     assert(size >= 0);
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return -1;
+    }
+
     if (size < alloc / 2) {
         /* Major downsize; resize down to exact size */
         alloc = size + 1;
     }
     else if (size < alloc) {
         /* Within allocated size; quick exit */
         Py_Size(self) = size;
         ((PyBytesObject *)self)->ob_bytes[size] = '\0'; /* Trailing null */
@@ -275,17 +442,23 @@
     }
 
     mysize = Py_Size(self);
     size = mysize + vo.len;
     if (size < 0) {
         PyObject_ReleaseBuffer(other, &vo);
         return PyErr_NoMemory();
     }
+
     if (size < self->ob_alloc) {
+        if (is_readonly((PyBytesObject *)self)) {
+            SET_RO_ERROR(self);
+            PyObject_ReleaseBuffer(other, &vo);
+            return NULL;
+        }
         Py_Size(self) = size;
         self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
     }
     else if (PyBytes_Resize((PyObject *)self, size) < 0) {
         PyObject_ReleaseBuffer(other, &vo);
         return NULL;
     }
     memcpy(self->ob_bytes + mysize, vo.buf, vo.len);
@@ -327,17 +500,22 @@
     Py_ssize_t size;
 
     if (count < 0)
         count = 0;
     mysize = Py_Size(self);
     size = mysize * count;
     if (count != 0 && size / count != mysize)
         return PyErr_NoMemory();
+
     if (size < self->ob_alloc) {
+        if (is_readonly((PyBytesObject *)self)) {
+            SET_RO_ERROR(self);
+            return NULL;
+        }
         Py_Size(self) = size;
         self->ob_bytes[Py_Size(self)] = '\0'; /* Trailing null byte */
     }
     else if (PyBytes_Resize((PyObject *)self, size) < 0)
         return NULL;
 
     if (mysize == 1)
         memset(self->ob_bytes, self->ob_bytes[0], size);
@@ -487,16 +665,22 @@
                                  "can't set bytes slice from %.100s",
                                  Py_Type(values)->tp_name);
                     return -1;
             }
             needed = vbytes.len;
             bytes = vbytes.buf;
     }
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        res = -1;
+        goto finish;
+    }
+
     if (lo < 0)
         lo = 0;
     if (hi < lo)
         hi = lo;
     if (hi > Py_Size(self))
         hi = Py_Size(self);
 
     avail = hi - lo;
@@ -553,16 +737,21 @@
     if (i < 0 || i >= Py_Size(self)) {
         PyErr_SetString(PyExc_IndexError, "bytes index out of range");
         return -1;
     }
 
     if (value == NULL)
         return bytes_setslice(self, i, i+1, NULL);
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return -1;
+    }
+
     ival = PyNumber_AsSsize_t(value, PyExc_ValueError);
     if (ival == -1 && PyErr_Occurred())
         return -1;
 
     if (ival < 0 || ival >= 256) {
         PyErr_SetString(PyExc_ValueError, "byte must be in range(0, 256)");
         return -1;
     }
@@ -572,16 +761,21 @@
 }
 
 static int
 bytes_ass_subscript(PyBytesObject *self, PyObject *item, PyObject *values)
 {
     Py_ssize_t start, stop, step, slicelen, needed;
     char *bytes;
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return -1;
+    }
+
     if (PyIndex_Check(item)) {
         Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
 
         if (i == -1 && PyErr_Occurred())
             return -1;
 
         if (i < 0)
             i += PyBytes_GET_SIZE(self);
@@ -1335,16 +1529,18 @@
 PyDoc_STRVAR(translate__doc__,
 "B.translate(table [,deletechars]) -> bytes\n\
 \n\
 Return a copy of the bytes B, where all characters occurring\n\
 in the optional argument deletechars are removed, and the\n\
 remaining characters have been mapped through the given\n\
 translation table, which must be a bytes of length 256.");
 
+/* XXX(gps): bytes could also use an in place bytes_itranslate method? */
+
 static PyObject *
 bytes_translate(PyBytesObject *self, PyObject *args)
 {
     register char *input, *output;
     register const char *table;
     register Py_ssize_t i, c, changed = 0;
     PyObject *input_obj = (PyObject*)self;
     const char *table1, *output_start, *del_table=NULL;
@@ -2030,16 +2226,18 @@
 
 PyDoc_STRVAR(replace__doc__,
 "B.replace (old, new[, count]) -> bytes\n\
 \n\
 Return a copy of bytes B with all occurrences of subsection\n\
 old replaced by new.  If the optional argument count is\n\
 given, only the first count occurrences are replaced.");
 
+/* XXX(gps): bytes could also use an in place bytes_ireplace method? */
+
 static PyObject *
 bytes_replace(PyBytesObject *self, PyObject *args)
 {
     Py_ssize_t count = -1;
     PyObject *from, *to, *res;
     PyBuffer vfrom, vto;
 
     if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
@@ -2382,16 +2580,21 @@
 \n\
 Reverse the order of the values in bytes in place.");
 static PyObject *
 bytes_reverse(PyBytesObject *self, PyObject *unused)
 {
     char swap, *head, *tail;
     Py_ssize_t i, j, n = Py_Size(self);
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return NULL;
+    }
+
     j = n / 2;
     head = self->ob_bytes;
     tail = head + n - 1;
     for (i = 0; i < j; i++) {
         swap = *head;
         *head++ = *tail;
         *tail-- = swap;
     }
@@ -2407,16 +2610,21 @@
 bytes_insert(PyBytesObject *self, PyObject *args)
 {
     int value;
     Py_ssize_t where, n = Py_Size(self);
 
     if (!PyArg_ParseTuple(args, "ni:insert", &where, &value))
         return NULL;
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return NULL;
+    }
+
     if (n == PY_SSIZE_T_MAX) {
         PyErr_SetString(PyExc_OverflowError,
                         "cannot add more objects to bytes");
         return NULL;
     }
     if (value < 0 || value >= 256) {
         PyErr_SetString(PyExc_ValueError,
                         "byte must be in range(0, 256)");
@@ -2472,16 +2680,21 @@
 bytes_pop(PyBytesObject *self, PyObject *args)
 {
     int value;
     Py_ssize_t where = -1, n = Py_Size(self);
 
     if (!PyArg_ParseTuple(args, "|n:pop", &where))
         return NULL;
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return NULL;
+    }
+
     if (n == 0) {
         PyErr_SetString(PyExc_OverflowError,
                         "cannot pop an empty bytes");
         return NULL;
     }
     if (where < 0)
         where += Py_Size(self);
     if (where < 0 || where >= Py_Size(self)) {
@@ -2505,16 +2718,21 @@
 bytes_remove(PyBytesObject *self, PyObject *arg)
 {
     int value;
     Py_ssize_t where, n = Py_Size(self);
 
     if (! _getbytevalue(arg, &value))
         return NULL;
 
+    if (is_readonly((PyBytesObject *)self)) {
+        SET_RO_ERROR(self);
+        return NULL;
+    }
+
     for (where = 0; where < n; where++) {
         if (self->ob_bytes[where] == value)
             break;
     }
     if (where == n) {
         PyErr_SetString(PyExc_ValueError, "value not found in bytes");
         return NULL;
     }
@@ -2783,16 +3001,18 @@
 
   error:
     Py_DECREF(newbytes);
     return NULL;
 }
 
 PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
 
+/* XXX(gps): should is_immutable() be in the pickle? */
+
 static PyObject *
 bytes_reduce(PyBytesObject *self)
 {
     PyObject *latin1;
     if (self->ob_bytes)
         latin1 = PyUnicode_DecodeLatin1(self->ob_bytes,
                                         Py_Size(self), NULL);
     else


More information about the Python-3000 mailing list