[Python-checkins] bpo-32147: Improved perfomance of binascii.unhexlify(). (GH-4586)

Serhiy Storchaka webhook-mailer at python.org
Mon Feb 26 15:35:48 EST 2018


https://github.com/python/cpython/commit/6b5df906afe113dbe421d044322254cfd4747c9c
commit: 6b5df906afe113dbe421d044322254cfd4747c9c
branch: master
author: Sergey Fedoseev <fedoseev.sergey at gmail.com>
committer: Serhiy Storchaka <storchaka at gmail.com>
date: 2018-02-26T22:35:41+02:00
summary:

bpo-32147: Improved perfomance of binascii.unhexlify(). (GH-4586)

files:
A Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst
M Lib/test/test_binascii.py
M Misc/ACKS
M Modules/binascii.c

diff --git a/Lib/test/test_binascii.py b/Lib/test/test_binascii.py
index 8fa57cdf1b0b..0997d9432bf6 100644
--- a/Lib/test/test_binascii.py
+++ b/Lib/test/test_binascii.py
@@ -198,6 +198,11 @@ def test_hex(self):
         self.assertEqual(s, u)
         self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1])
         self.assertRaises(binascii.Error, binascii.a2b_hex, t[:-1] + b'q')
+        self.assertRaises(binascii.Error, binascii.a2b_hex, bytes([255, 255]))
+        self.assertRaises(binascii.Error, binascii.a2b_hex, b'0G')
+        self.assertRaises(binascii.Error, binascii.a2b_hex, b'0g')
+        self.assertRaises(binascii.Error, binascii.a2b_hex, b'G0')
+        self.assertRaises(binascii.Error, binascii.a2b_hex, b'g0')
 
         # Confirm that b2a_hex == hexlify and a2b_hex == unhexlify
         self.assertEqual(binascii.hexlify(self.type2test(s)), t)
diff --git a/Misc/ACKS b/Misc/ACKS
index dee022f2ff0a..e2addfc210b7 100644
--- a/Misc/ACKS
+++ b/Misc/ACKS
@@ -460,6 +460,7 @@ Michael Farrell
 Troy J. Farrell
 Jim Fasarakis-Hilliard
 Mark Favas
+Sergey Fedoseev
 Boris Feld
 Thomas Fenzl
 Niels Ferguson
diff --git a/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst
new file mode 100644
index 000000000000..e02a97c5e9e6
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2017-11-28-10-23-13.bpo-32147.PI2k1Y.rst
@@ -0,0 +1,2 @@
+:func:`binascii.unhexlify` is now up to 2 times faster.
+Patch by Sergey Fedoseev.
diff --git a/Modules/binascii.c b/Modules/binascii.c
index 1af6b7f98f25..59e99282ae35 100644
--- a/Modules/binascii.c
+++ b/Modules/binascii.c
@@ -1130,21 +1130,6 @@ binascii_hexlify_impl(PyObject *module, Py_buffer *data)
     return _Py_strhex_bytes((const char *)data->buf, data->len);
 }
 
-static int
-to_int(int c)
-{
-    if (Py_ISDIGIT(c))
-        return c - '0';
-    else {
-        if (Py_ISUPPER(c))
-            c = Py_TOLOWER(c);
-        if (c >= 'a' && c <= 'f')
-            return c - 'a' + 10;
-    }
-    return -1;
-}
-
-
 /*[clinic input]
 binascii.a2b_hex
 
@@ -1187,9 +1172,9 @@ binascii_a2b_hex_impl(PyObject *module, Py_buffer *hexstr)
     retbuf = PyBytes_AS_STRING(retval);
 
     for (i=j=0; i < arglen; i += 2) {
-        int top = to_int(Py_CHARMASK(argbuf[i]));
-        int bot = to_int(Py_CHARMASK(argbuf[i+1]));
-        if (top == -1 || bot == -1) {
+        unsigned int top = _PyLong_DigitValue[Py_CHARMASK(argbuf[i])];
+        unsigned int bot = _PyLong_DigitValue[Py_CHARMASK(argbuf[i+1])];
+        if (top >= 16 || bot >= 16) {
             PyErr_SetString(Error,
                             "Non-hexadecimal digit found");
             goto finally;
@@ -1218,19 +1203,6 @@ binascii_unhexlify_impl(PyObject *module, Py_buffer *hexstr)
     return binascii_a2b_hex_impl(module, hexstr);
 }
 
-static const int table_hex[128] = {
-  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-   0, 1, 2, 3,  4, 5, 6, 7,  8, 9,-1,-1, -1,-1,-1,-1,
-  -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-  -1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
-  -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
-};
-
-#define hexval(c) table_hex[(unsigned int)(c)]
-
 #define MAXLINESIZE 76
 
 
@@ -1293,9 +1265,9 @@ binascii_a2b_qp_impl(PyObject *module, Py_buffer *data, int header)
                       (ascii_data[in+1] >= 'a' && ascii_data[in+1] <= 'f') ||
                       (ascii_data[in+1] >= '0' && ascii_data[in+1] <= '9'))) {
                 /* hexval */
-                ch = hexval(ascii_data[in]) << 4;
+                ch = _PyLong_DigitValue[ascii_data[in]] << 4;
                 in++;
-                ch |= hexval(ascii_data[in]);
+                ch |= _PyLong_DigitValue[ascii_data[in]];
                 in++;
                 odata[out++] = ch;
             }



More information about the Python-checkins mailing list