[Python-checkins] cpython: Issue #15061: Re-implemented hmac.compare_digest() in C

christian.heimes python-checkins at python.org
Sun Jun 24 13:49:00 CEST 2012


http://hg.python.org/cpython/rev/c82451eeb595
changeset:   77702:c82451eeb595
user:        Christian Heimes <christian at cheimes.de>
date:        Sun Jun 24 13:48:32 2012 +0200
summary:
  Issue #15061: Re-implemented hmac.compare_digest() in C

files:
  Doc/library/hmac.rst  |   16 ++-
  Lib/hmac.py           |   21 +----
  Lib/test/test_hmac.py |   81 ++++++++++++++++-
  Misc/NEWS             |    4 +
  Modules/operator.c    |  142 ++++++++++++++++++++++++++++++
  5 files changed, 234 insertions(+), 30 deletions(-)


diff --git a/Doc/library/hmac.rst b/Doc/library/hmac.rst
--- a/Doc/library/hmac.rst
+++ b/Doc/library/hmac.rst
@@ -73,7 +73,10 @@
 
    Returns the equivalent of ``a == b``, but avoids content based
    short circuiting behaviour to reduce the vulnerability to timing
-   analysis. The inputs must be :class:`bytes` instances.
+   analysis. The inputs must either both support the buffer protocol (e.g.
+   :class:`bytes` and :class:`bytearray` instances) or be ASCII only
+   :class:`str` instances as returned by :meth:`hexdigest`.
+   :class:`bytes` and :class:`str` instances can't be mixed.
 
    Using a short circuiting comparison (that is, one that terminates as soon
    as it finds any difference between the values) to check digests for
@@ -87,10 +90,13 @@
    .. note::
 
       While this function reduces the likelihood of leaking the contents of
-      the expected digest via a timing attack, it still uses short circuiting
-      behaviour based on the *length* of the inputs. It is assumed that the
-      expected length of the digest is not a secret, as it is typically
-      published as part of a file format, network protocol or API definition.
+      the expected digest via a timing attack, it still may leak some timing
+      information when the input values differ in lengths as well as in error
+      cases like unsupported types or non ASCII strings. When the inputs have
+      different length the timing depends solely on the length of ``b``. It
+      is assumed that the expected length of the digest is not a secret, as
+      it is typically published as part of a file format, network protocol
+      or API definition.
 
    .. versionadded:: 3.3
 
diff --git a/Lib/hmac.py b/Lib/hmac.py
--- a/Lib/hmac.py
+++ b/Lib/hmac.py
@@ -4,6 +4,7 @@
 """
 
 import warnings as _warnings
+from operator import _compare_digest as compare_digest
 
 trans_5C = bytes((x ^ 0x5C) for x in range(256))
 trans_36 = bytes((x ^ 0x36) for x in range(256))
@@ -13,26 +14,6 @@
 digest_size = None
 
 
-def compare_digest(a, b):
-    """Returns the equivalent of 'a == b', but avoids content based short
-    circuiting to reduce the vulnerability to timing attacks."""
-    # Consistent timing matters more here than data type flexibility
-    if not (isinstance(a, bytes) and isinstance(b, bytes)):
-        raise TypeError("inputs must be bytes instances")
-
-    # We assume the length of the expected digest is public knowledge,
-    # thus this early return isn't leaking anything an attacker wouldn't
-    # already know
-    if len(a) != len(b):
-        return False
-
-    # We assume that integers in the bytes range are all cached,
-    # thus timing shouldn't vary much due to integer object creation
-    result = 0
-    for x, y in zip(a, b):
-        result |= x ^ y
-    return result == 0
-
 
 class HMAC:
     """RFC 2104 HMAC class.  Also complies with RFC 4231.
diff --git a/Lib/test/test_hmac.py b/Lib/test/test_hmac.py
--- a/Lib/test/test_hmac.py
+++ b/Lib/test/test_hmac.py
@@ -304,7 +304,7 @@
 
 class CompareDigestTestCase(unittest.TestCase):
 
-    def test_compare(self):
+    def test_compare_digest(self):
         # Testing input type exception handling
         a, b = 100, 200
         self.assertRaises(TypeError, hmac.compare_digest, a, b)
@@ -316,10 +316,6 @@
         self.assertRaises(TypeError, hmac.compare_digest, a, b)
         a, b = b"foobar", "foobar"
         self.assertRaises(TypeError, hmac.compare_digest, a, b)
-        a, b = "foobar", "foobar"
-        self.assertRaises(TypeError, hmac.compare_digest, a, b)
-        a, b = bytearray(b"foobar"), bytearray(b"foobar")
-        self.assertRaises(TypeError, hmac.compare_digest, a, b)
 
         # Testing bytes of different lengths
         a, b = b"foobar", b"foo"
@@ -339,6 +335,81 @@
         a, b = b"\xde\xad\xbe\xef", b"\xde\xad\xbe\xef"
         self.assertTrue(hmac.compare_digest(a, b))
 
+        # Testing bytearrays of same lengths, same values
+        a, b = bytearray(b"foobar"), bytearray(b"foobar")
+        self.assertTrue(hmac.compare_digest(a, b))
+
+        # Testing bytearrays of diffeent lengths
+        a, b = bytearray(b"foobar"), bytearray(b"foo")
+        self.assertFalse(hmac.compare_digest(a, b))
+
+        # Testing bytearrays of same lengths, different values
+        a, b = bytearray(b"foobar"), bytearray(b"foobaz")
+        self.assertFalse(hmac.compare_digest(a, b))
+
+        # Testing byte and bytearray of same lengths, same values
+        a, b = bytearray(b"foobar"), b"foobar"
+        self.assertTrue(hmac.compare_digest(a, b))
+        self.assertTrue(hmac.compare_digest(b, a))
+
+        # Testing byte bytearray of diffeent lengths
+        a, b = bytearray(b"foobar"), b"foo"
+        self.assertFalse(hmac.compare_digest(a, b))
+        self.assertFalse(hmac.compare_digest(b, a))
+
+        # Testing byte and bytearray of same lengths, different values
+        a, b = bytearray(b"foobar"), b"foobaz"
+        self.assertFalse(hmac.compare_digest(a, b))
+        self.assertFalse(hmac.compare_digest(b, a))
+
+        # Testing str of same lengths
+        a, b = "foobar", "foobar"
+        self.assertTrue(hmac.compare_digest(a, b))
+
+        # Testing str of diffeent lengths
+        a, b = "foo", "foobar"
+        self.assertFalse(hmac.compare_digest(a, b))
+
+        # Testing bytes of same lengths, different values
+        a, b = "foobar", "foobaz"
+        self.assertFalse(hmac.compare_digest(a, b))
+
+        # Testing error cases
+        a, b = "foobar", b"foobar"
+        self.assertRaises(TypeError, hmac.compare_digest, a, b)
+        a, b = b"foobar", "foobar"
+        self.assertRaises(TypeError, hmac.compare_digest, a, b)
+        a, b = b"foobar", 1
+        self.assertRaises(TypeError, hmac.compare_digest, a, b)
+        a, b = 100, 200
+        self.assertRaises(TypeError, hmac.compare_digest, a, b)
+        a, b = "fooä", "fooä"
+        self.assertRaises(TypeError, hmac.compare_digest, a, b)
+
+        # subclasses are supported by ignore __eq__
+        class mystr(str):
+            def __eq__(self, other):
+                return False
+
+        a, b = mystr("foobar"), mystr("foobar")
+        self.assertTrue(hmac.compare_digest(a, b))
+        a, b = mystr("foobar"), "foobar"
+        self.assertTrue(hmac.compare_digest(a, b))
+        a, b = mystr("foobar"), mystr("foobaz")
+        self.assertFalse(hmac.compare_digest(a, b))
+
+        class mybytes(bytes):
+            def __eq__(self, other):
+                return False
+
+        a, b = mybytes(b"foobar"), mybytes(b"foobar")
+        self.assertTrue(hmac.compare_digest(a, b))
+        a, b = mybytes(b"foobar"), b"foobar"
+        self.assertTrue(hmac.compare_digest(a, b))
+        a, b = mybytes(b"foobar"), mybytes(b"foobaz")
+        self.assertFalse(hmac.compare_digest(a, b))
+
+
 def test_main():
     support.run_unittest(
         TestVectorsTestCase,
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #15061: Re-implemented hmac.compare_digest() in C to prevent further
+  timing analysis and to support all buffer protocol aware objects as well as
+  ASCII only str instances safely.
+
 - Issue #14815: Use Py_ssize_t instead of long for the object hash, to
   preserve all 64 bits of hash on Win64.
 
diff --git a/Modules/operator.c b/Modules/operator.c
--- a/Modules/operator.c
+++ b/Modules/operator.c
@@ -159,6 +159,146 @@
 #undef spam2
 #undef spam1o
 #undef spam1o
+
+/* compare_digest **********************************************************/
+
+/*
+ * timing safe compare
+ *
+ * Returns 1 of the strings are equal.
+ * In case of len(a) != len(b) the function tries to keep the timing
+ * dependent on the length of b. CPU cache locally may still alter timing
+ * a bit.
+ */
+static int
+_tscmp(const unsigned char *a, const unsigned char *b,
+        Py_ssize_t len_a, Py_ssize_t len_b)
+{
+    /* The volatile type declarations make sure that the compiler has no
+     * chance to optimize and fold the code in any way that may change
+     * the timing.
+     */
+    volatile Py_ssize_t length;
+    volatile const unsigned char *left;
+    volatile const unsigned char *right;
+    Py_ssize_t i;
+    unsigned char result;
+
+    /* loop count depends on length of b */
+    length = len_b;
+    left = NULL;
+    right = b;
+
+    /* don't use else here to keep the amount of CPU instructions constant,
+     * volatile forces re-evaluation
+     *  */
+    if (len_a == length) {
+        left = *((volatile const unsigned char**)&a);
+        result = 0;
+    }
+    if (len_a != length) {
+        left = b;
+        result = 1;
+    }
+
+    for (i=0; i < length; i++) {
+        result |= *left++ ^ *right++;
+    }
+
+    return (result == 0);
+}
+
+PyDoc_STRVAR(compare_digest__doc__,
+"compare_digest(a, b) -> bool\n"
+"\n"
+"Return the equivalent of 'a == b', but avoid any short circuiting to\n"
+"counterfeit timing analysis of input data. The function should be used to\n"
+"compare cryptographic secrets. a and b must both either support the buffer\n"
+"protocol (e.g. bytes) or be ASCII only str instances at the same time.\n"
+"\n"
+"Note: In case of an error or different lengths the function may disclose\n"
+"some timing information about the types and lengths of a and b.\n");
+
+
+static PyObject*
+compare_digest(PyObject *self, PyObject *args)
+{
+    PyObject *a, *b;
+    int rc;
+    PyObject *result;
+
+    if (!PyArg_ParseTuple(args, "OO:compare_digest", &a, &b)) {
+        return NULL;
+    }
+
+    /* ASCII unicode string */
+    if(PyUnicode_Check(a) && PyUnicode_Check(b)) {
+        if (PyUnicode_READY(a) == -1 || PyUnicode_READY(b) == -1) {
+            return NULL;
+        }
+        if (!PyUnicode_IS_ASCII(a) || !PyUnicode_IS_ASCII(b)) {
+            PyErr_SetString(PyExc_TypeError,
+                            "comparing strings with non-ASCII characters is "
+                            "not supported");
+            return NULL;
+        }
+
+        rc = _tscmp(PyUnicode_DATA(a),
+                    PyUnicode_DATA(b),
+                    PyUnicode_GET_LENGTH(a),
+                    PyUnicode_GET_LENGTH(b));
+    }
+    /* fallback to buffer interface for bytes, bytesarray and other */
+    else {
+        Py_buffer view_a;
+        Py_buffer view_b;
+
+        if ((PyObject_CheckBuffer(a) == 0) & (PyObject_CheckBuffer(b) == 0)) {
+            PyErr_Format(PyExc_TypeError,
+                         "unsupported operand types(s) or combination of types: "
+                         "'%.100s' and '%.100s'",
+                         Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
+            return NULL;
+        }
+
+        if (PyObject_GetBuffer(a, &view_a, PyBUF_SIMPLE) == -1) {
+            return NULL;
+        }
+        if (view_a.ndim > 1) {
+            PyErr_SetString(PyExc_BufferError,
+                            "Buffer must be single dimension");
+            PyBuffer_Release(&view_a);
+            return NULL;
+        }
+
+        if (PyObject_GetBuffer(b, &view_b, PyBUF_SIMPLE) == -1) {
+            PyBuffer_Release(&view_a);
+            return NULL;
+        }
+        if (view_b.ndim > 1) {
+            PyErr_SetString(PyExc_BufferError,
+                            "Buffer must be single dimension");
+            PyBuffer_Release(&view_a);
+            PyBuffer_Release(&view_b);
+            return NULL;
+        }
+
+        rc = _tscmp((const unsigned char*)view_a.buf,
+                    (const unsigned char*)view_b.buf,
+                    view_a.len,
+                    view_b.len);
+
+        PyBuffer_Release(&view_a);
+        PyBuffer_Release(&view_b);
+    }
+
+    result = PyBool_FromLong(rc);
+    Py_INCREF(result);
+    return result;
+}
+
+/* operator methods **********************************************************/
+
 #define spam1(OP,DOC) {#OP, OP, METH_VARARGS, PyDoc_STR(DOC)},
 #define spam2(OP,ALTOP,DOC) {#OP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)}, \
                            {#ALTOP, op_##OP, METH_VARARGS, PyDoc_STR(DOC)},
@@ -227,6 +367,8 @@
 spam2(gt,__gt__, "gt(a, b) -- Same as a>b.")
 spam2(ge,__ge__, "ge(a, b) -- Same as a>=b.")
 
+    {"_compare_digest", (PyCFunction)compare_digest, METH_VARARGS,
+     compare_digest__doc__},
     {NULL,              NULL}           /* sentinel */
 
 };

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list