[Python-checkins] bpo-32922: dbm.open() now encodes filename with the filesystem encoding. (GH-5832)

Serhiy Storchaka webhook-mailer at python.org
Mon Feb 26 09:02:31 EST 2018


https://github.com/python/cpython/commit/6f600ff1734ca2fdcdd37a809adf8130f0d8cc4e
commit: 6f600ff1734ca2fdcdd37a809adf8130f0d8cc4e
branch: master
author: Serhiy Storchaka <storchaka at gmail.com>
committer: GitHub <noreply at github.com>
date: 2018-02-26T16:02:22+02:00
summary:

bpo-32922: dbm.open() now encodes filename with the filesystem encoding. (GH-5832)

files:
A Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst
M Lib/test/test_dbm_dumb.py
M Lib/test/test_dbm_gnu.py
M Lib/test/test_dbm_ndbm.py
M Modules/_dbmmodule.c
M Modules/_gdbmmodule.c
M Modules/clinic/_dbmmodule.c.h
M Modules/clinic/_gdbmmodule.c.h

diff --git a/Lib/test/test_dbm_dumb.py b/Lib/test/test_dbm_dumb.py
index 21f29af05d28..652a355d990b 100644
--- a/Lib/test/test_dbm_dumb.py
+++ b/Lib/test/test_dbm_dumb.py
@@ -281,6 +281,21 @@ def test_readonly_files(self):
                 self.assertEqual(sorted(f.keys()), sorted(self._dict))
                 f.close()  # don't write
 
+    @unittest.skipUnless(support.TESTFN_NONASCII,
+                         'requires OS support of non-ASCII encodings')
+    def test_nonascii_filename(self):
+        filename = support.TESTFN_NONASCII
+        for suffix in ['.dir', '.dat', '.bak']:
+            self.addCleanup(support.unlink, filename + suffix)
+        with dumbdbm.open(filename, 'c') as db:
+            db[b'key'] = b'value'
+        self.assertTrue(os.path.exists(filename + '.dat'))
+        self.assertTrue(os.path.exists(filename + '.dir'))
+        with dumbdbm.open(filename, 'r') as db:
+            self.assertEqual(list(db.keys()), [b'key'])
+            self.assertTrue(b'key' in db)
+            self.assertEqual(db[b'key'], b'value')
+
     def tearDown(self):
         _delete_files()
 
diff --git a/Lib/test/test_dbm_gnu.py b/Lib/test/test_dbm_gnu.py
index 304b33286978..d96df9284806 100644
--- a/Lib/test/test_dbm_gnu.py
+++ b/Lib/test/test_dbm_gnu.py
@@ -2,7 +2,7 @@
 gdbm = support.import_module("dbm.gnu") #skip if not supported
 import unittest
 import os
-from test.support import TESTFN, unlink
+from test.support import TESTFN, TESTFN_NONASCII, unlink
 
 
 filename = TESTFN
@@ -93,5 +93,39 @@ def test_context_manager(self):
         self.assertEqual(str(cm.exception),
                          "GDBM object has already been closed")
 
+    def test_bytes(self):
+        with gdbm.open(filename, 'c') as db:
+            db[b'bytes key \xbd'] = b'bytes value \xbd'
+        with gdbm.open(filename, 'r') as db:
+            self.assertEqual(list(db.keys()), [b'bytes key \xbd'])
+            self.assertTrue(b'bytes key \xbd' in db)
+            self.assertEqual(db[b'bytes key \xbd'], b'bytes value \xbd')
+
+    def test_unicode(self):
+        with gdbm.open(filename, 'c') as db:
+            db['Unicode key \U0001f40d'] = 'Unicode value \U0001f40d'
+        with gdbm.open(filename, 'r') as db:
+            self.assertEqual(list(db.keys()), ['Unicode key \U0001f40d'.encode()])
+            self.assertTrue('Unicode key \U0001f40d'.encode() in db)
+            self.assertTrue('Unicode key \U0001f40d' in db)
+            self.assertEqual(db['Unicode key \U0001f40d'.encode()],
+                             'Unicode value \U0001f40d'.encode())
+            self.assertEqual(db['Unicode key \U0001f40d'],
+                             'Unicode value \U0001f40d'.encode())
+
+    @unittest.skipUnless(TESTFN_NONASCII,
+                         'requires OS support of non-ASCII encodings')
+    def test_nonascii_filename(self):
+        filename = TESTFN_NONASCII
+        self.addCleanup(unlink, filename)
+        with gdbm.open(filename, 'c') as db:
+            db[b'key'] = b'value'
+        self.assertTrue(os.path.exists(filename))
+        with gdbm.open(filename, 'r') as db:
+            self.assertEqual(list(db.keys()), [b'key'])
+            self.assertTrue(b'key' in db)
+            self.assertEqual(db[b'key'], b'value')
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/Lib/test/test_dbm_ndbm.py b/Lib/test/test_dbm_ndbm.py
index 49f4426e4cb9..fb7d0e8281e3 100644
--- a/Lib/test/test_dbm_ndbm.py
+++ b/Lib/test/test_dbm_ndbm.py
@@ -1,5 +1,6 @@
 from test import support
 support.import_module("dbm.ndbm") #skip if not supported
+import os
 import unittest
 import dbm.ndbm
 from dbm.ndbm import error
@@ -47,6 +48,42 @@ def test_context_manager(self):
         self.assertEqual(str(cm.exception),
                          "DBM object has already been closed")
 
+    def test_bytes(self):
+        with dbm.ndbm.open(self.filename, 'c') as db:
+            db[b'bytes key \xbd'] = b'bytes value \xbd'
+        with dbm.ndbm.open(self.filename, 'r') as db:
+            self.assertEqual(list(db.keys()), [b'bytes key \xbd'])
+            self.assertTrue(b'bytes key \xbd' in db)
+            self.assertEqual(db[b'bytes key \xbd'], b'bytes value \xbd')
+
+    def test_unicode(self):
+        with dbm.ndbm.open(self.filename, 'c') as db:
+            db['Unicode key \U0001f40d'] = 'Unicode value \U0001f40d'
+        with dbm.ndbm.open(self.filename, 'r') as db:
+            self.assertEqual(list(db.keys()), ['Unicode key \U0001f40d'.encode()])
+            self.assertTrue('Unicode key \U0001f40d'.encode() in db)
+            self.assertTrue('Unicode key \U0001f40d' in db)
+            self.assertEqual(db['Unicode key \U0001f40d'.encode()],
+                             'Unicode value \U0001f40d'.encode())
+            self.assertEqual(db['Unicode key \U0001f40d'],
+                             'Unicode value \U0001f40d'.encode())
+
+    @unittest.skipUnless(support.TESTFN_NONASCII,
+                         'requires OS support of non-ASCII encodings')
+    def test_nonascii_filename(self):
+        filename = support.TESTFN_NONASCII
+        for suffix in ['', '.pag', '.dir', '.db']:
+            self.addCleanup(support.unlink, filename + suffix)
+        with dbm.ndbm.open(filename, 'c') as db:
+            db[b'key'] = b'value'
+        self.assertTrue(any(os.path.exists(filename + suffix)
+                            for suffix in ['', '.pag', '.dir', '.db']))
+        with dbm.ndbm.open(filename, 'r') as db:
+            self.assertEqual(list(db.keys()), [b'key'])
+            self.assertTrue(b'key' in db)
+            self.assertEqual(db[b'key'], b'value')
+
+
 
 if __name__ == '__main__':
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst b/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst
new file mode 100644
index 000000000000..412e588586c0
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2018-02-23-19-12-04.bpo-32922.u-xe0B.rst
@@ -0,0 +1,2 @@
+dbm.open() now encodes filename with the filesystem encoding rather than
+default encoding.
diff --git a/Modules/_dbmmodule.c b/Modules/_dbmmodule.c
index 7e1344177b5d..8afd92cf3ca8 100644
--- a/Modules/_dbmmodule.c
+++ b/Modules/_dbmmodule.c
@@ -412,7 +412,7 @@ static PyTypeObject Dbmtype = {
 
 _dbm.open as dbmopen
 
-    filename: str
+    filename: unicode
         The filename to open.
 
     flags: str="r"
@@ -429,9 +429,9 @@ Return a database object.
 [clinic start generated code]*/
 
 static PyObject *
-dbmopen_impl(PyObject *module, const char *filename, const char *flags,
+dbmopen_impl(PyObject *module, PyObject *filename, const char *flags,
              int mode)
-/*[clinic end generated code: output=5fade8cf16e0755f input=226334bade5764e6]*/
+/*[clinic end generated code: output=9527750f5df90764 input=376a9d903a50df59]*/
 {
     int iflags;
 
@@ -450,7 +450,20 @@ dbmopen_impl(PyObject *module, const char *filename, const char *flags,
                         "arg 2 to open should be 'r', 'w', 'c', or 'n'");
         return NULL;
     }
-    return newdbmobject(filename, iflags, mode);
+
+    PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename);
+    if (filenamebytes == NULL) {
+        return NULL;
+    }
+    const char *name = PyBytes_AS_STRING(filenamebytes);
+    if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) {
+        Py_DECREF(filenamebytes);
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        return NULL;
+    }
+    PyObject *self = newdbmobject(name, iflags, mode);
+    Py_DECREF(filenamebytes);
+    return self;
 }
 
 static PyMethodDef dbmmodule_methods[] = {
diff --git a/Modules/_gdbmmodule.c b/Modules/_gdbmmodule.c
index 12d973b5cee3..9996d8c26fb7 100644
--- a/Modules/_gdbmmodule.c
+++ b/Modules/_gdbmmodule.c
@@ -527,7 +527,7 @@ static PyTypeObject Dbmtype = {
 
 /*[clinic input]
 _gdbm.open as dbmopen
-    filename as name: str
+    filename: unicode
     flags: str="r"
     mode: int(py_default="0o666") = 0o666
     /
@@ -557,8 +557,9 @@ when the database has to be created.  It defaults to octal 0o666.
 [clinic start generated code]*/
 
 static PyObject *
-dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode)
-/*[clinic end generated code: output=31aa1bafdf5da688 input=55563cd60e51984a]*/
+dbmopen_impl(PyObject *module, PyObject *filename, const char *flags,
+             int mode)
+/*[clinic end generated code: output=9527750f5df90764 input=3be0b0875974b928]*/
 {
     int iflags;
 
@@ -606,7 +607,19 @@ dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode)
         }
     }
 
-    return newdbmobject(name, iflags, mode);
+    PyObject *filenamebytes = PyUnicode_EncodeFSDefault(filename);
+    if (filenamebytes == NULL) {
+        return NULL;
+    }
+    const char *name = PyBytes_AS_STRING(filenamebytes);
+    if (strlen(name) != (size_t)PyBytes_GET_SIZE(filenamebytes)) {
+        Py_DECREF(filenamebytes);
+        PyErr_SetString(PyExc_ValueError, "embedded null character");
+        return NULL;
+    }
+    PyObject *self = newdbmobject(name, iflags, mode);
+    Py_DECREF(filenamebytes);
+    return self;
 }
 
 static const char dbmmodule_open_flags[] = "rwcn"
diff --git a/Modules/clinic/_dbmmodule.c.h b/Modules/clinic/_dbmmodule.c.h
index 63d5b1a41fbf..0f831c9eec72 100644
--- a/Modules/clinic/_dbmmodule.c.h
+++ b/Modules/clinic/_dbmmodule.c.h
@@ -121,18 +121,18 @@ PyDoc_STRVAR(dbmopen__doc__,
     {"open", (PyCFunction)dbmopen, METH_FASTCALL, dbmopen__doc__},
 
 static PyObject *
-dbmopen_impl(PyObject *module, const char *filename, const char *flags,
+dbmopen_impl(PyObject *module, PyObject *filename, const char *flags,
              int mode);
 
 static PyObject *
 dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
-    const char *filename;
+    PyObject *filename;
     const char *flags = "r";
     int mode = 438;
 
-    if (!_PyArg_ParseStack(args, nargs, "s|si:open",
+    if (!_PyArg_ParseStack(args, nargs, "U|si:open",
         &filename, &flags, &mode)) {
         goto exit;
     }
@@ -141,4 +141,4 @@ dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=8ce71abac849155f input=a9049054013a1b77]*/
+/*[clinic end generated code: output=5c858b4080a011a4 input=a9049054013a1b77]*/
diff --git a/Modules/clinic/_gdbmmodule.c.h b/Modules/clinic/_gdbmmodule.c.h
index 2222967aaa50..7bdc4321df29 100644
--- a/Modules/clinic/_gdbmmodule.c.h
+++ b/Modules/clinic/_gdbmmodule.c.h
@@ -234,23 +234,24 @@ PyDoc_STRVAR(dbmopen__doc__,
     {"open", (PyCFunction)dbmopen, METH_FASTCALL, dbmopen__doc__},
 
 static PyObject *
-dbmopen_impl(PyObject *module, const char *name, const char *flags, int mode);
+dbmopen_impl(PyObject *module, PyObject *filename, const char *flags,
+             int mode);
 
 static PyObject *
 dbmopen(PyObject *module, PyObject *const *args, Py_ssize_t nargs)
 {
     PyObject *return_value = NULL;
-    const char *name;
+    PyObject *filename;
     const char *flags = "r";
     int mode = 438;
 
-    if (!_PyArg_ParseStack(args, nargs, "s|si:open",
-        &name, &flags, &mode)) {
+    if (!_PyArg_ParseStack(args, nargs, "U|si:open",
+        &filename, &flags, &mode)) {
         goto exit;
     }
-    return_value = dbmopen_impl(module, name, flags, mode);
+    return_value = dbmopen_impl(module, filename, flags, mode);
 
 exit:
     return return_value;
 }
-/*[clinic end generated code: output=dc0aca8c00055d02 input=a9049054013a1b77]*/
+/*[clinic end generated code: output=dec05ff9c5aeaeae input=a9049054013a1b77]*/



More information about the Python-checkins mailing list