[issue16201] socket.gethostbyname incorrectly parses ip

Charles-François Natali report at bugs.python.org
Thu Aug 29 16:13:59 CEST 2013


Charles-François Natali added the comment:

Here's a patch using inet_pton() if available, otherwise inet_aton()
if available, otherwise fallback to getaddrinfo().
This should work on every platform, but if a platform has neither
inet_pton() nor inet_aton(), calling getaddrinfo() will incur an
overhead.
All Unices have either inet_aton() or inet_pton().
For Windows >= Vista, inet_pton() is available.
I'm not sure whether XP has one of them: if not, then we might keep
the hand-parsing as a fallback.

This adds support for numeric IPv6 addresses (if inet_pton() is
available), which notably speeds up sendto() on IPv6 sockets, and
gives a tiny speedup for IPv4 sockets:
before:
$ ./python -m timeit -s "import socket; s =
socket.socket(socket.AF_INET, socket.SOCK_DGRAM); DATA = b'x'"
"s.sendto(DATA, ('127.0.0.1', 4242))"
100000 loops, best of 3: 13.2 usec per loop
$ ./python -m timeit -s "import socket; s =
socket.socket(socket.AF_INET6, socket.SOCK_DGRAM); DATA = b'x'"
"s.sendto(DATA, ('::1', 4242))"
10000 loops, best of 3: 32.1 usec per loop
after:
$ ./python -m timeit -s "import socket; s =
socket.socket(socket.AF_INET, socket.SOCK_DGRAM); DATA = b'x'"
"s.sendto(DATA, ('127.0.0.1', 4242))"
100000 loops, best of 3: 11.5 usec per loop
$ ./python -m timeit -s "import socket; s =
socket.socket(socket.AF_INET6, socket.SOCK_DGRAM); DATA = b'x'"
"s.sendto(DATA, ('::1', 4242))"
100000 loops, best of 3: 12.8 usec per loop

Note that if the IPv6 address contains a scope ID ('%' suffix), then
we fallback to getaddrinfo() which handles it properly (the scope ID
can be an interface index but also an interface name).

----------
Added file: http://bugs.python.org/file31506/parse_inet.diff

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue16201>
_______________________________________
-------------- next part --------------
diff -r 4b3238923b01 Lib/test/test_socket.py
--- a/Lib/test/test_socket.py	Fri May 10 19:57:44 2013 -0700
+++ b/Lib/test/test_socket.py	Thu Aug 29 16:10:40 2013 +0200
@@ -761,6 +761,31 @@
         if not fqhn in all_host_names:
             self.fail("Error testing host resolution mechanisms. (fqdn: %s, all: %s)" % (fqhn, repr(all_host_names)))
 
+    def test_hosts_resolution(self):
+        valid_hosts = [support.HOST]
+        valid_hosts_ipv6 = [support.HOSTv6]
+
+        invalid_hosts = ['0.1.1.1.', 'a1.1.1.1', '10.1a.1.1',
+                         '4294967306.4294967296.4294967296.1']
+        invalid_hosts_ipv6 = ['::1q', '::1::2', '1:1:1:1:1:1:1:1:1']
+
+        for host in valid_hosts:
+            self.assertEqual(socket.gethostbyname(host), host)
+
+        for host in invalid_hosts:
+            self.assertRaises(OSError, socket.gethostbyname, host)
+
+        # gethostbyaddr() support IPv6, contrarily to gethosbyname()
+        if support.IPV6_ENABLED:
+            valid_hosts.extend(valid_hosts_ipv6)
+            invalid_hosts.extend(invalid_hosts_ipv6)
+
+        for host in valid_hosts:
+            self.assertIn(host, socket.gethostbyaddr(host)[2])
+
+        for host in invalid_hosts:
+            self.assertRaises(OSError, socket.gethostbyaddr, host)
+
     @unittest.skipUnless(hasattr(socket, 'sethostname'), "test needs socket.sethostname()")
     @unittest.skipUnless(hasattr(socket, 'gethostname'), "test needs socket.gethostname()")
     def test_sethostname(self):
diff -r 4b3238923b01 Modules/socketmodule.c
--- a/Modules/socketmodule.c	Fri May 10 19:57:44 2013 -0700
+++ b/Modules/socketmodule.c	Thu Aug 29 16:10:40 2013 +0200
@@ -799,8 +799,6 @@
 {
     struct addrinfo hints, *res;
     int error;
-    int d1, d2, d3, d4;
-    char ch;
 
     memset((void *) addr_ret, '\0', sizeof(*addr_ret));
     if (name[0] == '\0') {
@@ -865,20 +863,49 @@
         sin->sin_addr.s_addr = INADDR_BROADCAST;
         return sizeof(sin->sin_addr);
     }
-    if (sscanf(name, "%d.%d.%d.%d%c", &d1, &d2, &d3, &d4, &ch) == 4 &&
-        0 <= d1 && d1 <= 255 && 0 <= d2 && d2 <= 255 &&
-        0 <= d3 && d3 <= 255 && 0 <= d4 && d4 <= 255) {
-        struct sockaddr_in *sin;
-        sin = (struct sockaddr_in *)addr_ret;
-        sin->sin_addr.s_addr = htonl(
-            ((long) d1 << 24) | ((long) d2 << 16) |
-            ((long) d3 << 8) | ((long) d4 << 0));
-        sin->sin_family = AF_INET;
+    /* avoid a name resolution in case of numeric address */
+#ifdef HAVE_INET_PTON
+    /* check for an IPv4 address */
+    if (af == AF_UNSPEC || af == AF_INET) {
+        struct sockaddr_in *sin = (struct sockaddr_in *)addr_ret;
+        if (inet_pton(AF_INET, name, &sin->sin_addr) > 0) {
+            sin->sin_family = AF_INET;
 #ifdef HAVE_SOCKADDR_SA_LEN
-        sin->sin_len = sizeof(*sin);
-#endif
-        return 4;
-    }
+            sin->sin_len = sizeof(*sin);
+#endif
+            return 4;
+        }
+    }
+#ifdef ENABLE_IPV6
+    /* check for an IPv6 address - if the address contains a scope ID, we
+     * fallback to getaddrinfo(), which can handle translation from interface
+     * name to interface index */
+    if ((af == AF_UNSPEC || af == AF_INET6) && !strchr(name, '%')) {
+        struct sockaddr_in6 *sin = (struct sockaddr_in6 *)addr_ret;
+        if (inet_pton(AF_INET6, name, &sin->sin6_addr) > 0) {
+            sin->sin6_family = AF_INET6;
+#ifdef HAVE_SOCKADDR_SA_LEN
+            sin->sin6_len = sizeof(*sin);
+#endif
+            return 16;
+        }
+    }
+#endif /* ENABLE_IPV6 */
+#elif defined(HAVE_INET_ATON)
+    /* check for an IPv4 address */
+     if (af == AF_INET || af == AF_UNSPEC) {
+        struct sockaddr_in *sin = (struct sockaddr_in *)addr_ret;
+        if (inet_aton(name, &sin->sin_addr) > 0) {
+            sin->sin_family = AF_INET;
+#ifdef HAVE_SOCKADDR_SA_LEN
+            sin->sin_len = sizeof(*sin);
+#endif
+            return 4;
+        }
+    }   
+#endif /* HAVE_INET_PTON */
+
+    /* perform a name resolution */
     memset(&hints, 0, sizeof(hints));
     hints.ai_family = af;
     Py_BEGIN_ALLOW_THREADS


More information about the Python-bugs-list mailing list