[Python-checkins] gh-69093: Add indexing and slicing support to sqlite3.Blob (#91599)

JelleZijlstra webhook-mailer at python.org
Thu Apr 21 21:45:21 EDT 2022


https://github.com/python/cpython/commit/29afb7d2efed6ee48a67dafdc1a1f34dd60153cf
commit: 29afb7d2efed6ee48a67dafdc1a1f34dd60153cf
branch: main
author: Erlend Egeberg Aasland <erlend.aasland at innova.no>
committer: JelleZijlstra <jelle.zijlstra at gmail.com>
date: 2022-04-21T18:45:16-07:00
summary:

gh-69093: Add indexing and slicing support to sqlite3.Blob (#91599)

Authored-by: Aviv Palivoda <palaviv at gmail.com>
Co-authored-by: Erlend E. Aasland <erlend.aasland at innova.no>

files:
A Misc/NEWS.d/next/Library/2022-04-14-01-00-31.gh-issue-69093.bmlMwI.rst
M Doc/includes/sqlite3/blob.py
M Doc/library/sqlite3.rst
M Lib/test/test_sqlite3/test_dbapi.py
M Modules/_sqlite/blob.c

diff --git a/Doc/includes/sqlite3/blob.py b/Doc/includes/sqlite3/blob.py
index b3694ad08af46..d947059b3ae64 100644
--- a/Doc/includes/sqlite3/blob.py
+++ b/Doc/includes/sqlite3/blob.py
@@ -2,15 +2,18 @@
 
 con = sqlite3.connect(":memory:")
 con.execute("create table test(blob_col blob)")
-con.execute("insert into test(blob_col) values (zeroblob(10))")
+con.execute("insert into test(blob_col) values (zeroblob(13))")
 
 # Write to our blob, using two write operations:
 with con.blobopen("test", "blob_col", 1) as blob:
-    blob.write(b"Hello")
-    blob.write(b"World")
+    blob.write(b"hello, ")
+    blob.write(b"world.")
+    # Modify the first and last bytes of our blob
+    blob[0] = b"H"
+    blob[-1] = b"!"
 
 # Read the contents of our blob
 with con.blobopen("test", "blob_col", 1) as blob:
     greeting = blob.read()
 
-print(greeting)  # outputs "b'HelloWorld'"
+print(greeting)  # outputs "b'Hello, world!'"
diff --git a/Doc/library/sqlite3.rst b/Doc/library/sqlite3.rst
index cbe7bb1fb9a0d..69e77e922a9ab 100644
--- a/Doc/library/sqlite3.rst
+++ b/Doc/library/sqlite3.rst
@@ -1051,9 +1051,10 @@ Blob Objects
 
 .. class:: Blob
 
-   A :class:`Blob` instance is a :term:`file-like object` that can read and write
-   data in an SQLite :abbr:`BLOB (Binary Large OBject)`.  Call ``len(blob)`` to
-   get the size (number of bytes) of the blob.
+   A :class:`Blob` instance is a :term:`file-like object`
+   that can read and write data in an SQLite :abbr:`BLOB (Binary Large OBject)`.
+   Call :func:`len(blob) <len>` to get the size (number of bytes) of the blob.
+   Use indices and :term:`slices <slice>` for direct access to the blob data.
 
    Use the :class:`Blob` as a :term:`context manager` to ensure that the blob
    handle is closed after use.
diff --git a/Lib/test/test_sqlite3/test_dbapi.py b/Lib/test/test_sqlite3/test_dbapi.py
index 79dcb3ef8954a..8bfdce2bbe92e 100644
--- a/Lib/test/test_sqlite3/test_dbapi.py
+++ b/Lib/test/test_sqlite3/test_dbapi.py
@@ -33,7 +33,7 @@
     check_disallow_instantiation,
     threading_helper,
 )
-from _testcapi import INT_MAX
+from _testcapi import INT_MAX, ULLONG_MAX
 from os import SEEK_SET, SEEK_CUR, SEEK_END
 from test.support.os_helper import TESTFN, unlink, temp_dir
 
@@ -1138,6 +1138,13 @@ def test_blob_write_error_length(self):
         with self.assertRaisesRegex(ValueError, "data longer than blob"):
             self.blob.write(b"a" * 1000)
 
+        self.blob.seek(0, SEEK_SET)
+        n = len(self.blob)
+        self.blob.write(b"a" * (n-1))
+        self.blob.write(b"a")
+        with self.assertRaisesRegex(ValueError, "data longer than blob"):
+            self.blob.write(b"a")
+
     def test_blob_write_error_row_changed(self):
         self.cx.execute("update test set b='aaaa' where rowid=1")
         with self.assertRaises(sqlite.OperationalError):
@@ -1162,12 +1169,127 @@ def test_blob_open_error(self):
                 with self.assertRaisesRegex(sqlite.OperationalError, regex):
                     self.cx.blobopen(*args, **kwds)
 
+    def test_blob_length(self):
+        self.assertEqual(len(self.blob), 50)
+
+    def test_blob_get_item(self):
+        self.assertEqual(self.blob[5], b"b")
+        self.assertEqual(self.blob[6], b"l")
+        self.assertEqual(self.blob[7], b"o")
+        self.assertEqual(self.blob[8], b"b")
+        self.assertEqual(self.blob[-1], b"!")
+
+    def test_blob_set_item(self):
+        self.blob[0] = b"b"
+        expected = b"b" + self.data[1:]
+        actual = self.cx.execute("select b from test").fetchone()[0]
+        self.assertEqual(actual, expected)
+
+    def test_blob_set_item_with_offset(self):
+        self.blob.seek(0, SEEK_END)
+        self.assertEqual(self.blob.read(), b"")  # verify that we're at EOB
+        self.blob[0] = b"T"
+        self.blob[-1] = b"."
+        self.blob.seek(0, SEEK_SET)
+        expected = b"This blob data string is exactly fifty bytes long."
+        self.assertEqual(self.blob.read(), expected)
+
+    def test_blob_set_buffer_object(self):
+        from array import array
+        self.blob[0] = memoryview(b"1")
+        self.assertEqual(self.blob[0], b"1")
+
+        self.blob[1] = bytearray(b"2")
+        self.assertEqual(self.blob[1], b"2")
+
+        self.blob[2] = array("b", [4])
+        self.assertEqual(self.blob[2], b"\x04")
+
+        self.blob[0:5] = memoryview(b"12345")
+        self.assertEqual(self.blob[0:5], b"12345")
+
+        self.blob[0:5] = bytearray(b"23456")
+        self.assertEqual(self.blob[0:5], b"23456")
+
+        self.blob[0:5] = array("b", [1, 2, 3, 4, 5])
+        self.assertEqual(self.blob[0:5], b"\x01\x02\x03\x04\x05")
+
+    def test_blob_set_item_negative_index(self):
+        self.blob[-1] = b"z"
+        self.assertEqual(self.blob[-1], b"z")
+
+    def test_blob_get_slice(self):
+        self.assertEqual(self.blob[5:14], b"blob data")
+
+    def test_blob_get_empty_slice(self):
+        self.assertEqual(self.blob[5:5], b"")
+
+    def test_blob_get_slice_negative_index(self):
+        self.assertEqual(self.blob[5:-5], self.data[5:-5])
+
+    def test_blob_get_slice_with_skip(self):
+        self.assertEqual(self.blob[0:10:2], b"ti lb")
+
+    def test_blob_set_slice(self):
+        self.blob[0:5] = b"12345"
+        expected = b"12345" + self.data[5:]
+        actual = self.cx.execute("select b from test").fetchone()[0]
+        self.assertEqual(actual, expected)
+
+    def test_blob_set_empty_slice(self):
+        self.blob[0:0] = b""
+        self.assertEqual(self.blob[:], self.data)
+
+    def test_blob_set_slice_with_skip(self):
+        self.blob[0:10:2] = b"12345"
+        actual = self.cx.execute("select b from test").fetchone()[0]
+        expected = b"1h2s3b4o5 " + self.data[10:]
+        self.assertEqual(actual, expected)
+
+    def test_blob_mapping_invalid_index_type(self):
+        msg = "indices must be integers"
+        with self.assertRaisesRegex(TypeError, msg):
+            self.blob[5:5.5]
+        with self.assertRaisesRegex(TypeError, msg):
+            self.blob[1.5]
+        with self.assertRaisesRegex(TypeError, msg):
+            self.blob["a"] = b"b"
+
+    def test_blob_get_item_error(self):
+        dataset = [len(self.blob), 105, -105]
+        for idx in dataset:
+            with self.subTest(idx=idx):
+                with self.assertRaisesRegex(IndexError, "index out of range"):
+                    self.blob[idx]
+        with self.assertRaisesRegex(IndexError, "cannot fit 'int'"):
+            self.blob[ULLONG_MAX]
+
+    def test_blob_set_item_error(self):
+        with self.assertRaisesRegex(ValueError, "must be a single byte"):
+            self.blob[0] = b"multiple"
+        with self.assertRaisesRegex(TypeError, "doesn't support.*deletion"):
+            del self.blob[0]
+        with self.assertRaisesRegex(IndexError, "Blob index out of range"):
+            self.blob[1000] = b"a"
+
+    def test_blob_set_slice_error(self):
+        with self.assertRaisesRegex(IndexError, "wrong size"):
+            self.blob[5:10] = b"a"
+        with self.assertRaisesRegex(IndexError, "wrong size"):
+            self.blob[5:10] = b"a" * 1000
+        with self.assertRaisesRegex(TypeError, "doesn't support.*deletion"):
+            del self.blob[5:10]
+        with self.assertRaisesRegex(ValueError, "step cannot be zero"):
+            self.blob[5:10:0] = b"12345"
+        with self.assertRaises(BufferError):
+            self.blob[5:10] = memoryview(b"abcde")[::2]
+
     def test_blob_sequence_not_supported(self):
-        with self.assertRaises(TypeError):
+        with self.assertRaisesRegex(TypeError, "unsupported operand"):
             self.blob + self.blob
-        with self.assertRaises(TypeError):
+        with self.assertRaisesRegex(TypeError, "unsupported operand"):
             self.blob * 5
-        with self.assertRaises(TypeError):
+        with self.assertRaisesRegex(TypeError, "is not iterable"):
             b"a" in self.blob
 
     def test_blob_context_manager(self):
@@ -1209,6 +1331,14 @@ def test_blob_closed(self):
                 blob.__enter__()
             with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
                 blob.__exit__(None, None, None)
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                len(blob)
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob[0]
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob[0:1]
+            with self.assertRaisesRegex(sqlite.ProgrammingError, msg):
+                blob[0] = b""
 
     def test_blob_closed_db_read(self):
         with memory_database() as cx:
diff --git a/Misc/NEWS.d/next/Library/2022-04-14-01-00-31.gh-issue-69093.bmlMwI.rst b/Misc/NEWS.d/next/Library/2022-04-14-01-00-31.gh-issue-69093.bmlMwI.rst
new file mode 100644
index 0000000000000..4bb8531beeacd
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-14-01-00-31.gh-issue-69093.bmlMwI.rst
@@ -0,0 +1,2 @@
+Add indexing and slicing support to :class:`sqlite3.Blob`. Patch by Aviv Palivoda
+and Erlend E. Aasland.
diff --git a/Modules/_sqlite/blob.c b/Modules/_sqlite/blob.c
index 3f766302d6251..0c57ff8ca4252 100644
--- a/Modules/_sqlite/blob.c
+++ b/Modules/_sqlite/blob.c
@@ -120,8 +120,11 @@ blob_seterror(pysqlite_Blob *self, int rc)
 }
 
 static PyObject *
-inner_read(pysqlite_Blob *self, int length, int offset)
+inner_read(pysqlite_Blob *self, Py_ssize_t length, Py_ssize_t offset)
 {
+    assert(length <= sqlite3_blob_bytes(self->blob));
+    assert(offset <= sqlite3_blob_bytes(self->blob));
+
     PyObject *buffer = PyBytes_FromStringAndSize(NULL, length);
     if (buffer == NULL) {
         return NULL;
@@ -130,7 +133,7 @@ inner_read(pysqlite_Blob *self, int length, int offset)
     char *raw_buffer = PyBytes_AS_STRING(buffer);
     int rc;
     Py_BEGIN_ALLOW_THREADS
-    rc = sqlite3_blob_read(self->blob, raw_buffer, length, offset);
+    rc = sqlite3_blob_read(self->blob, raw_buffer, (int)length, (int)offset);
     Py_END_ALLOW_THREADS
 
     if (rc != SQLITE_OK) {
@@ -181,17 +184,20 @@ blob_read_impl(pysqlite_Blob *self, int length)
 };
 
 static int
-inner_write(pysqlite_Blob *self, const void *buf, Py_ssize_t len, int offset)
+inner_write(pysqlite_Blob *self, const void *buf, Py_ssize_t len,
+            Py_ssize_t offset)
 {
-    int remaining_len = sqlite3_blob_bytes(self->blob) - self->offset;
+    Py_ssize_t blob_len = sqlite3_blob_bytes(self->blob);
+    Py_ssize_t remaining_len = blob_len - offset;
     if (len > remaining_len) {
         PyErr_SetString(PyExc_ValueError, "data longer than blob length");
         return -1;
     }
 
+    assert(offset <= blob_len);
     int rc;
     Py_BEGIN_ALLOW_THREADS
-    rc = sqlite3_blob_write(self->blob, buf, (int)len, offset);
+    rc = sqlite3_blob_write(self->blob, buf, (int)len, (int)offset);
     Py_END_ALLOW_THREADS
 
     if (rc != SQLITE_OK) {
@@ -347,6 +353,192 @@ blob_exit_impl(pysqlite_Blob *self, PyObject *type, PyObject *val,
     Py_RETURN_FALSE;
 }
 
+static Py_ssize_t
+blob_length(pysqlite_Blob *self)
+{
+    if (!check_blob(self)) {
+        return -1;
+    }
+    return sqlite3_blob_bytes(self->blob);
+};
+
+static Py_ssize_t
+get_subscript_index(pysqlite_Blob *self, PyObject *item)
+{
+    Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
+    if (i == -1 && PyErr_Occurred()) {
+        return -1;
+    }
+    int blob_len = sqlite3_blob_bytes(self->blob);
+    if (i < 0) {
+        i += blob_len;
+    }
+    if (i < 0 || i >= blob_len) {
+        PyErr_SetString(PyExc_IndexError, "Blob index out of range");
+        return -1;
+    }
+    return i;
+}
+
+static PyObject *
+subscript_index(pysqlite_Blob *self, PyObject *item)
+{
+    Py_ssize_t i = get_subscript_index(self, item);
+    if (i < 0) {
+        return NULL;
+    }
+    return inner_read(self, 1, i);
+}
+
+static int
+get_slice_info(pysqlite_Blob *self, PyObject *item, Py_ssize_t *start,
+               Py_ssize_t *stop, Py_ssize_t *step, Py_ssize_t *slicelen)
+{
+    if (PySlice_Unpack(item, start, stop, step) < 0) {
+        return -1;
+    }
+    int len = sqlite3_blob_bytes(self->blob);
+    *slicelen = PySlice_AdjustIndices(len, start, stop, *step);
+    return 0;
+}
+
+static PyObject *
+subscript_slice(pysqlite_Blob *self, PyObject *item)
+{
+    Py_ssize_t start, stop, step, len;
+    if (get_slice_info(self, item, &start, &stop, &step, &len) < 0) {
+        return NULL;
+    }
+
+    if (step == 1) {
+        return inner_read(self, len, start);
+    }
+    PyObject *blob = inner_read(self, stop - start, start);
+    if (blob == NULL) {
+        return NULL;
+    }
+    PyObject *result = PyBytes_FromStringAndSize(NULL, len);
+    if (result != NULL) {
+        char *blob_buf = PyBytes_AS_STRING(blob);
+        char *res_buf = PyBytes_AS_STRING(result);
+        for (Py_ssize_t i = 0, j = 0; i < len; i++, j += step) {
+            res_buf[i] = blob_buf[j];
+        }
+        Py_DECREF(blob);
+    }
+    return result;
+}
+
+static PyObject *
+blob_subscript(pysqlite_Blob *self, PyObject *item)
+{
+    if (!check_blob(self)) {
+        return NULL;
+    }
+
+    if (PyIndex_Check(item)) {
+        return subscript_index(self, item);
+    }
+    if (PySlice_Check(item)) {
+        return subscript_slice(self, item);
+    }
+
+    PyErr_SetString(PyExc_TypeError, "Blob indices must be integers");
+    return NULL;
+}
+
+static int
+ass_subscript_index(pysqlite_Blob *self, PyObject *item, PyObject *value)
+{
+    if (value == NULL) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Blob doesn't support item deletion");
+        return -1;
+    }
+    Py_ssize_t i = get_subscript_index(self, item);
+    if (i < 0) {
+        return -1;
+    }
+
+    Py_buffer vbuf;
+    if (PyObject_GetBuffer(value, &vbuf, PyBUF_SIMPLE) < 0) {
+        return -1;
+    }
+    int rc = -1;
+    if (vbuf.len != 1) {
+        PyErr_SetString(PyExc_ValueError, "Blob assignment must be a single byte");
+    }
+    else {
+        rc = inner_write(self, (const char *)vbuf.buf, 1, i);
+    }
+    PyBuffer_Release(&vbuf);
+    return rc;
+}
+
+static int
+ass_subscript_slice(pysqlite_Blob *self, PyObject *item, PyObject *value)
+{
+    if (value == NULL) {
+        PyErr_SetString(PyExc_TypeError,
+                        "Blob doesn't support slice deletion");
+        return -1;
+    }
+
+    Py_ssize_t start, stop, step, len;
+    if (get_slice_info(self, item, &start, &stop, &step, &len) < 0) {
+        return -1;
+    }
+
+    if (len == 0) {
+        return 0;
+    }
+
+    Py_buffer vbuf;
+    if (PyObject_GetBuffer(value, &vbuf, PyBUF_SIMPLE) < 0) {
+        return -1;
+    }
+
+    int rc = -1;
+    if (vbuf.len != len) {
+        PyErr_SetString(PyExc_IndexError,
+                        "Blob slice assignment is wrong size");
+    }
+    else if (step == 1) {
+        rc = inner_write(self, vbuf.buf, len, start);
+    }
+    else {
+        PyObject *blob_bytes = inner_read(self, stop - start, start);
+        if (blob_bytes != NULL) {
+            char *blob_buf = PyBytes_AS_STRING(blob_bytes);
+            for (Py_ssize_t i = 0, j = 0; i < len; i++, j += step) {
+                blob_buf[j] = ((char *)vbuf.buf)[i];
+            }
+            rc = inner_write(self, blob_buf, stop - start, start);
+            Py_DECREF(blob_bytes);
+        }
+    }
+    PyBuffer_Release(&vbuf);
+    return rc;
+}
+
+static int
+blob_ass_subscript(pysqlite_Blob *self, PyObject *item, PyObject *value)
+{
+    if (!check_blob(self)) {
+        return -1;
+    }
+
+    if (PyIndex_Check(item)) {
+        return ass_subscript_index(self, item, value);
+    }
+    if (PySlice_Check(item)) {
+        return ass_subscript_slice(self, item, value);
+    }
+
+    PyErr_SetString(PyExc_TypeError, "Blob indices must be integers");
+    return -1;
+}
+
 
 static PyMethodDef blob_methods[] = {
     BLOB_CLOSE_METHODDEF
@@ -370,6 +562,11 @@ static PyType_Slot blob_slots[] = {
     {Py_tp_clear, blob_clear},
     {Py_tp_methods, blob_methods},
     {Py_tp_members, blob_members},
+
+    // Mapping protocol
+    {Py_mp_length, blob_length},
+    {Py_mp_subscript, blob_subscript},
+    {Py_mp_ass_subscript, blob_ass_subscript},
     {0, NULL},
 };
 



More information about the Python-checkins mailing list