[Python-checkins] cpython: Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem

antoine.pitrou python-checkins at python.org
Fri Dec 16 14:47:06 CET 2011


http://hg.python.org/cpython/rev/1f23bb74f4bc
changeset:   73998:1f23bb74f4bc
user:        Antoine Pitrou <solipsis at pitrou.net>
date:        Fri Dec 16 14:46:36 2011 +0100
summary:
  Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem
encoding and the surrogateescape error handler, rather than UTF-8.  Patch
by David Watson.

files:
  Doc/library/socket.rst  |  18 +++++++-
  Lib/test/test_socket.py |  63 ++++++++++++++++++++++++++++-
  Misc/NEWS               |   4 +
  Modules/socketmodule.c  |  25 ++++++++--
  4 files changed, 101 insertions(+), 9 deletions(-)


diff --git a/Doc/library/socket.rst b/Doc/library/socket.rst
--- a/Doc/library/socket.rst
+++ b/Doc/library/socket.rst
@@ -40,9 +40,23 @@
 Depending on the system and the build options, various socket families
 are supported by this module.
 
-Socket addresses are represented as follows:
+The address format required by a particular socket object is automatically
+selected based on the address family specified when the socket object was
+created.  Socket addresses are represented as follows:
 
-- A single string is used for the :const:`AF_UNIX` address family.
+- The address of an :const:`AF_UNIX` socket bound to a file system node
+  is represented as a string, using the file system encoding and the
+  ``'surrogateescape'`` error handler (see :pep:`383`).  An address in
+  Linux's abstract namespace is returned as a :class:`bytes` object with
+  an initial null byte; note that sockets in this namespace can
+  communicate with normal file system sockets, so programs intended to
+  run on Linux may need to deal with both types of address.  A string or
+  :class:`bytes` object can be used for either type of address when
+  passing it as an argument.
+
+   .. versionchanged:: 3.3
+      Previously, :const:`AF_UNIX` socket paths were assumed to use UTF-8
+      encoding.
 
 - A pair ``(host, port)`` is used for the :const:`AF_INET` address family,
   where *host* is a string representing either a hostname in Internet domain
diff --git a/Lib/test/test_socket.py b/Lib/test/test_socket.py
--- a/Lib/test/test_socket.py
+++ b/Lib/test/test_socket.py
@@ -1538,7 +1538,6 @@
     def _testRecvFromNegative(self):
         self.cli.sendto(MSG, 0, (HOST, self.port))
 
-
 # Tests for the sendmsg()/recvmsg() interface.  Where possible, the
 # same test code is used with different families and types of socket
 # (e.g. stream, datagram), and tests using recvmsg() are repeated
@@ -4241,6 +4240,66 @@
         with socket.socket(socket.AF_UNIX, socket.SOCK_STREAM) as s:
             self.assertRaises(socket.error, s.bind, address)
 
+    def testStrName(self):
+        # Check that an abstract name can be passed as a string.
+        s = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        try:
+            s.bind("\x00python\x00test\x00")
+            self.assertEqual(s.getsockname(), b"\x00python\x00test\x00")
+        finally:
+            s.close()
+
+class TestUnixDomain(unittest.TestCase):
+
+    def setUp(self):
+        self.sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+
+    def tearDown(self):
+        self.sock.close()
+
+    def encoded(self, path):
+        # Return the given path encoded in the file system encoding,
+        # or skip the test if this is not possible.
+        try:
+            return os.fsencode(path)
+        except UnicodeEncodeError:
+            self.skipTest(
+                "Pathname {0!a} cannot be represented in file "
+                "system encoding {1!r}".format(
+                    path, sys.getfilesystemencoding()))
+
+    def testStrAddr(self):
+        # Test binding to and retrieving a normal string pathname.
+        path = os.path.abspath(support.TESTFN)
+        self.sock.bind(path)
+        self.addCleanup(support.unlink, path)
+        self.assertEqual(self.sock.getsockname(), path)
+
+    def testBytesAddr(self):
+        # Test binding to a bytes pathname.
+        path = os.path.abspath(support.TESTFN)
+        self.sock.bind(self.encoded(path))
+        self.addCleanup(support.unlink, path)
+        self.assertEqual(self.sock.getsockname(), path)
+
+    def testSurrogateescapeBind(self):
+        # Test binding to a valid non-ASCII pathname, with the
+        # non-ASCII bytes supplied using surrogateescape encoding.
+        path = os.path.abspath(support.TESTFN_UNICODE)
+        b = self.encoded(path)
+        self.sock.bind(b.decode("ascii", "surrogateescape"))
+        self.addCleanup(support.unlink, path)
+        self.assertEqual(self.sock.getsockname(), path)
+
+    def testUnencodableAddr(self):
+        # Test binding to a pathname that cannot be encoded in the
+        # file system encoding.
+        if support.TESTFN_UNENCODABLE is None:
+            self.skipTest("No unencodable filename available")
+        path = os.path.abspath(support.TESTFN_UNENCODABLE)
+        self.sock.bind(path)
+        self.addCleanup(support.unlink, path)
+        self.assertEqual(self.sock.getsockname(), path)
 
 @unittest.skipUnless(thread, 'Threading required for this test.')
 class BufferIOTest(SocketConnectedTest):
@@ -4517,6 +4576,8 @@
     ])
     if hasattr(socket, "socketpair"):
         tests.append(BasicSocketPairTest)
+    if hasattr(socket, "AF_UNIX"):
+        tests.append(TestUnixDomain)
     if sys.platform == 'linux':
         tests.append(TestLinuxAbstractNamespace)
     if isTipcAvailable():
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -419,6 +419,10 @@
 Library
 -------
 
+- Issue #8373: The filesystem path of AF_UNIX sockets now uses the filesystem
+  encoding and the surrogateescape error handler, rather than UTF-8.  Patch
+  by David Watson.
+
 - Issue #10350: Read and save errno before calling a function which might
   overwrite it.  Original patch by Hallvard B Furuseth.
 
diff --git a/Modules/socketmodule.c b/Modules/socketmodule.c
--- a/Modules/socketmodule.c
+++ b/Modules/socketmodule.c
@@ -1073,7 +1073,7 @@
 #endif /* linux */
         {
             /* regular NULL-terminated string */
-            return PyUnicode_FromString(a->sun_path);
+            return PyUnicode_DecodeFSDefault(a->sun_path);
         }
     }
 #endif /* AF_UNIX */
@@ -1269,8 +1269,18 @@
         struct sockaddr_un* addr;
         char *path;
         int len;
-        if (!PyArg_Parse(args, "s#", &path, &len))
-            return 0;
+        int retval = 0;
+
+        /* PEP 383.  Not using PyUnicode_FSConverter since we need to
+           allow embedded nulls on Linux. */
+        if (PyUnicode_Check(args)) {
+            if ((args = PyUnicode_EncodeFSDefault(args)) == NULL)
+                return 0;
+        }
+        else
+            Py_INCREF(args);
+        if (!PyArg_Parse(args, "y#", &path, &len))
+            goto unix_out;
 
         addr = (struct sockaddr_un*)addr_ret;
 #ifdef linux
@@ -1279,7 +1289,7 @@
             if (len > sizeof addr->sun_path) {
                 PyErr_SetString(PyExc_OSError,
                                 "AF_UNIX path too long");
-                return 0;
+                goto unix_out;
             }
         }
         else
@@ -1289,7 +1299,7 @@
             if (len >= sizeof addr->sun_path) {
                 PyErr_SetString(PyExc_OSError,
                                 "AF_UNIX path too long");
-                return 0;
+                goto unix_out;
             }
             addr->sun_path[len] = 0;
         }
@@ -1300,7 +1310,10 @@
 #else
         *len_ret = len + offsetof(struct sockaddr_un, sun_path);
 #endif
-        return 1;
+        retval = 1;
+    unix_out:
+        Py_DECREF(args);
+        return retval;
     }
 #endif /* AF_UNIX */
 

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list