[Python-checkins] cpython: Issue #18582: Add 'pbkdf2_hmac' to the hashlib module.

christian.heimes python-checkins at python.org
Sun Oct 13 00:52:54 CEST 2013


http://hg.python.org/cpython/rev/5fd56d6d3fce
changeset:   86264:5fd56d6d3fce
user:        Christian Heimes <christian at cheimes.de>
date:        Sun Oct 13 00:52:43 2013 +0200
summary:
  Issue #18582: Add 'pbkdf2_hmac' to the hashlib module.

files:
  Doc/library/hashlib.rst  |   43 ++++++++
  Doc/whatsnew/3.4.rst     |    8 +
  Lib/hashlib.py           |    8 +
  Lib/test/test_hashlib.py |   82 ++++++++++++++++
  Misc/NEWS                |    3 +
  Modules/_hashopenssl.c   |  136 +++++++++++++++++++++++++++
  6 files changed, 280 insertions(+), 0 deletions(-)


diff --git a/Doc/library/hashlib.rst b/Doc/library/hashlib.rst
--- a/Doc/library/hashlib.rst
+++ b/Doc/library/hashlib.rst
@@ -32,6 +32,10 @@
    Some algorithms have known hash collision weaknesses, refer to the "See
    also" section at the end.
 
+
+Hash algorithms
+---------------
+
 There is one constructor method named for each type of :dfn:`hash`.  All return
 a hash object with the same simple interface. For example: use :func:`sha1` to
 create a SHA1 hash object. You can now feed this object with :term:`bytes-like
@@ -174,6 +178,43 @@
    compute the digests of data sharing a common initial substring.
 
 
+Key Derivation Function
+-----------------------
+
+Key derivation and key stretching algorithms are designed for secure password
+hashing. Naive algorithms such as ``sha1(password)`` are not resistant
+against brute-force attacks. A good password hashing function must be tunable,
+slow and include a salt.
+
+
+.. function:: pbkdf2_hmac(name, password, salt, rounds, dklen=None)
+
+   The function provides PKCS#5 password-based key derivation function 2. It
+   uses HMAC as pseudorandom function.
+
+   The string *name* is the desired name of the hash digest algorithm for
+   HMAC, e.g. 'sha1' or 'sha256'. *password* and *salt* are interpreted as
+   buffers of bytes. Applications and libraries should limit *password* to
+   a sensible value (e.g. 1024). *salt* should be about 16 or more bytes from
+   a proper source, e.g. :func:`os.urandom`.
+
+   The number of *rounds* should be chosen based on the hash algorithm and
+   computing power. As of 2013 a value of at least 100,000 rounds of SHA-256
+   have been suggested.
+
+   *dklen* is the length of the derived key. If *dklen* is ``None`` then the
+   digest size of the hash algorithm *name* is used, e.g. 64 for SHA-512.
+
+   >>> import hashlib, binascii
+   >>> dk = hashlib.pbkdf2_hmac('sha256', b'password', b'salt', 100000)
+   >>> binascii.hexlify(dk)
+   b'0394a2ede332c9a13eb82e9b24631604c31df978b4e2f0fbd2c549944f9d79a5'
+
+   .. versionadded:: 3.4
+
+   .. note:: *pbkdf2_hmac* is only available with OpenSSL 1.0 and newer.
+
+
 .. seealso::
 
    Module :mod:`hmac`
@@ -189,3 +230,5 @@
       Wikipedia article with information on which algorithms have known issues and
       what that means regarding their use.
 
+   http://www.ietf.org/rfc/rfc2898.txt
+      PKCS #5: Password-Based Cryptography Specification Version 2.0
diff --git a/Doc/whatsnew/3.4.rst b/Doc/whatsnew/3.4.rst
--- a/Doc/whatsnew/3.4.rst
+++ b/Doc/whatsnew/3.4.rst
@@ -261,6 +261,14 @@
 New :func:`functools.singledispatch` decorator: see the :pep:`443`.
 
 
+hashlib
+-------
+
+New :func:`hashlib.pbkdf2_hmac` function.
+
+(Contributed by Christian Heimes in :issue:`18582`)
+
+
 inspect
 -------
 
diff --git a/Lib/hashlib.py b/Lib/hashlib.py
--- a/Lib/hashlib.py
+++ b/Lib/hashlib.py
@@ -147,6 +147,14 @@
     new = __py_new
     __get_hash = __get_builtin_constructor
 
+# PBKDF2 requires OpenSSL 1.0+ with HMAC and SHA
+try:
+    from _hashlib import pbkdf2_hmac
+except ImportError:
+    pass
+else:
+    __all__ += ('pbkdf2_hmac',)
+
 for __func_name in __always_supported:
     # try them all, some may not work due to the OpenSSL
     # version not supporting that algorithm.
diff --git a/Lib/test/test_hashlib.py b/Lib/test/test_hashlib.py
--- a/Lib/test/test_hashlib.py
+++ b/Lib/test/test_hashlib.py
@@ -545,6 +545,88 @@
 
         self.assertEqual(expected_hash, hasher.hexdigest())
 
+    pbkdf2_test_vectors = [
+        (b'password', b'salt', 1, None),
+        (b'password', b'salt', 2, None),
+        (b'password', b'salt', 4096, None),
+        # too slow, it takes over a minute on a fast CPU.
+        #(b'password', b'salt', 16777216, None),
+        (b'passwordPASSWORDpassword', b'saltSALTsaltSALTsaltSALTsaltSALTsalt',
+         4096, -1),
+        (b'pass\0word', b'sa\0lt', 4096, 16),
+    ]
+
+    pbkdf2_results = {
+        "sha1": [
+            # offical test vectors from RFC 6070
+            (bytes.fromhex('0c60c80f961f0e71f3a9b524af6012062fe037a6'), None),
+            (bytes.fromhex('ea6c014dc72d6f8ccd1ed92ace1d41f0d8de8957'), None),
+            (bytes.fromhex('4b007901b765489abead49d926f721d065a429c1'), None),
+            #(bytes.fromhex('eefe3d61cd4da4e4e9945b3d6ba2158c2634e984'), None),
+            (bytes.fromhex('3d2eec4fe41c849b80c8d83662c0e44a8b291a964c'
+                           'f2f07038'), 25),
+            (bytes.fromhex('56fa6aa75548099dcc37d7f03425e0c3'), None),],
+        "sha256": [
+            (bytes.fromhex('120fb6cffcf8b32c43e7225256c4f837'
+                           'a86548c92ccc35480805987cb70be17b'), None),
+            (bytes.fromhex('ae4d0c95af6b46d32d0adff928f06dd0'
+                           '2a303f8ef3c251dfd6e2d85a95474c43'), None),
+            (bytes.fromhex('c5e478d59288c841aa530db6845c4c8d'
+                           '962893a001ce4e11a4963873aa98134a'), None),
+            #(bytes.fromhex('cf81c66fe8cfc04d1f31ecb65dab4089'
+            #               'f7f179e89b3b0bcb17ad10e3ac6eba46'), None),
+            (bytes.fromhex('348c89dbcbd32b2f32d814b8116e84cf2b17'
+                           '347ebc1800181c4e2a1fb8dd53e1c635518c7dac47e9'), 40),
+            (bytes.fromhex('89b69d0516f829893c696226650a8687'), None),],
+        "sha512": [
+            (bytes.fromhex('867f70cf1ade02cff3752599a3a53dc4af34c7a669815ae5'
+                           'd513554e1c8cf252c02d470a285a0501bad999bfe943c08f'
+                           '050235d7d68b1da55e63f73b60a57fce'), None),
+            (bytes.fromhex('e1d9c16aa681708a45f5c7c4e215ceb66e011a2e9f004071'
+                           '3f18aefdb866d53cf76cab2868a39b9f7840edce4fef5a82'
+                           'be67335c77a6068e04112754f27ccf4e'), None),
+            (bytes.fromhex('d197b1b33db0143e018b12f3d1d1479e6cdebdcc97c5c0f8'
+                           '7f6902e072f457b5143f30602641b3d55cd335988cb36b84'
+                           '376060ecd532e039b742a239434af2d5'), None),
+            (bytes.fromhex('8c0511f4c6e597c6ac6315d8f0362e225f3c501495ba23b8'
+                           '68c005174dc4ee71115b59f9e60cd9532fa33e0f75aefe30'
+                           '225c583a186cd82bd4daea9724a3d3b8'), 64),
+            (bytes.fromhex('9d9e9c4cd21fe4be24d5b8244c759665'), None),],
+    }
+
+    @unittest.skipUnless(hasattr(hashlib, 'pbkdf2_hmac'),
+                         'pbkdf2_hmac required for this test.')
+    def test_pbkdf2_hmac(self):
+        pbkdf2 = hashlib.pbkdf2_hmac
+
+        for digest_name, results in self.pbkdf2_results.items():
+            for i, vector in enumerate(self.pbkdf2_test_vectors):
+                password, salt, rounds, dklen = vector
+                expected, overwrite_dklen = results[i]
+                if overwrite_dklen:
+                    dklen = overwrite_dklen
+                out = pbkdf2(digest_name, password, salt, rounds, dklen)
+                self.assertEqual(out, expected,
+                                 (digest_name, password, salt, rounds, dklen))
+                out = pbkdf2(digest_name, memoryview(password),
+                             memoryview(salt), rounds, dklen)
+                out = pbkdf2(digest_name, bytearray(password),
+                             bytearray(salt), rounds, dklen)
+                self.assertEqual(out, expected)
+                if dklen is None:
+                    out = pbkdf2(digest_name, password, salt, rounds)
+                    self.assertEqual(out, expected,
+                                     (digest_name, password, salt, rounds))
+
+        self.assertRaises(TypeError, pbkdf2, b'sha1', b'pass', b'salt', 1)
+        self.assertRaises(TypeError, pbkdf2, 'sha1', 'pass', 'salt', 1)
+        self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 0)
+        self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', -1)
+        self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 1, 0)
+        self.assertRaises(ValueError, pbkdf2, 'sha1', b'pass', b'salt', 1, -1)
+        with self.assertRaisesRegex(ValueError, 'unsupported hash type'):
+            pbkdf2('unknown', b'pass', b'salt', 1)
+
 
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,9 @@
 Library
 -------
 
+- Issue #18582: Add 'pbkdf2_hmac' to the hashlib module. It implements PKCS#5
+  password-based key derivation functions with HMAC as pseudorandom function.
+
 - Issue #19131: The aifc module now correctly reads and writes sampwidth of
   compressed streams.
 
diff --git a/Modules/_hashopenssl.c b/Modules/_hashopenssl.c
--- a/Modules/_hashopenssl.c
+++ b/Modules/_hashopenssl.c
@@ -22,6 +22,7 @@
 #include <openssl/evp.h>
 /* We use the object interface to discover what hashes OpenSSL supports. */
 #include <openssl/objects.h>
+#include "openssl/err.h"
 
 #define MUNCH_SIZE INT_MAX
 
@@ -61,6 +62,34 @@
 DEFINE_CONSTS_FOR_NEW(sha512)
 #endif
 
+static PyObject *
+_setException(PyObject *exc)
+{
+    unsigned long errcode;
+    const char *lib, *func, *reason;
+
+    errcode = ERR_peek_last_error();
+    if (!errcode) {
+        PyErr_SetString(exc, "unknown reasons");
+        return NULL;
+    }
+    ERR_clear_error();
+
+    lib = ERR_lib_error_string(errcode);
+    func = ERR_func_error_string(errcode);
+    reason = ERR_reason_error_string(errcode);
+
+    if (lib && func) {
+        PyErr_Format(exc, "[%s: %s] %s", lib, func, reason);
+    }
+    else if (lib) {
+        PyErr_Format(exc, "[%s] %s", lib, reason);
+    }
+    else {
+        PyErr_SetString(exc, reason);
+    }
+    return NULL;
+}
 
 static EVPobject *
 newEVPobject(PyObject *name)
@@ -466,6 +495,109 @@
     return ret_obj;
 }
 
+#if (OPENSSL_VERSION_NUMBER >= 0x10000000 && !defined(OPENSSL_NO_HMAC) \
+     && !defined(OPENSSL_NO_SHA))
+#define PY_PBKDF2_HMAC 1
+
+PyDoc_STRVAR(pbkdf2_hmac__doc__,
+"pbkdf2_hmac(hash_name, password, salt, iterations, dklen=None) -> key\n\
+\n\
+Password based key derivation function 2 (PKCS #5 v2.0) with HMAC as\n\
+pseudorandom function.");
+
+static PyObject *
+pbkdf2_hmac(PyObject *self, PyObject *args, PyObject *kwdict)
+{
+    static char *kwlist[] = {"hash_name", "password", "salt", "iterations",
+                             "dklen", NULL};
+    PyObject *key_obj = NULL, *dklen_obj = Py_None;
+    char *name, *key;
+    Py_buffer password, salt;
+    long iterations, dklen;
+    int retval;
+    const EVP_MD *digest;
+
+    if (!PyArg_ParseTupleAndKeywords(args, kwdict, "sy*y*l|O:pbkdf2_hmac",
+                                     kwlist, &name, &password, &salt,
+                                     &iterations, &dklen_obj)) {
+        return NULL;
+    }
+
+    digest = EVP_get_digestbyname(name);
+    if (digest == NULL) {
+        PyErr_SetString(PyExc_ValueError, "unsupported hash type");
+        goto end;
+    }
+
+    if (password.len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "password is too long.");
+        goto end;
+    }
+
+    if (salt.len > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "salt is too long.");
+        goto end;
+    }
+
+    if (iterations < 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "iteration value must be greater than 0.");
+        goto end;
+    }
+    if (iterations > INT_MAX) {
+        PyErr_SetString(PyExc_OverflowError,
+                        "iteration value is too great.");
+        goto end;
+    }
+
+    if (dklen_obj == Py_None) {
+        dklen = EVP_MD_size(digest);
+    } else {
+        dklen = PyLong_AsLong(dklen_obj);
+        if ((dklen == -1) && PyErr_Occurred()) {
+            goto end;
+        }
+    }
+    if (dklen < 1) {
+        PyErr_SetString(PyExc_ValueError,
+                        "key length must be greater than 0.");
+        goto end;
+    }
+    if (dklen > INT_MAX) {
+        /* INT_MAX is always smaller than dkLen max (2^32 - 1) * hLen */
+        PyErr_SetString(PyExc_OverflowError,
+                        "key length is too great.");
+        goto end;
+    }
+
+    key_obj = PyBytes_FromStringAndSize(NULL, dklen);
+    if (key_obj == NULL) {
+        goto end;
+    }
+    key = PyBytes_AS_STRING(key_obj);
+
+    Py_BEGIN_ALLOW_THREADS
+    retval = PKCS5_PBKDF2_HMAC((char*)password.buf, password.len,
+                               (unsigned char *)salt.buf, salt.len,
+                               iterations, digest, dklen,
+                               (unsigned char *)key);
+    Py_END_ALLOW_THREADS
+
+    if (!retval) {
+        Py_CLEAR(key_obj);
+        _setException(PyExc_ValueError);
+        goto end;
+    }
+
+  end:
+    PyBuffer_Release(&password);
+    PyBuffer_Release(&salt);
+    return key_obj;
+}
+
+#endif
 
 /* State for our callback function so that it can accumulate a result. */
 typedef struct _internal_name_mapper_state {
@@ -588,6 +720,10 @@
 
 static struct PyMethodDef EVP_functions[] = {
     {"new", (PyCFunction)EVP_new, METH_VARARGS|METH_KEYWORDS, EVP_new__doc__},
+#ifdef PY_PBKDF2_HMAC
+    {"pbkdf2_hmac", (PyCFunction)pbkdf2_hmac, METH_VARARGS|METH_KEYWORDS,
+     pbkdf2_hmac__doc__},
+#endif
     CONSTRUCTOR_METH_DEF(md5),
     CONSTRUCTOR_METH_DEF(sha1),
 #ifdef _OPENSSL_SUPPORTS_SHA2

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list