[Python-checkins] (no subject)

Mon Aug 10 10:48:29 EDT 2020

To: python-checkins at python.org
Subject: bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)
Content-Type: text/plain; charset="utf-8"
Content-Transfer-Encoding: quoted-printable
MIME-Version: 1.0

https://github.com/python/cpython/commit/4ce6faa6c9591de6079347eccc9e61ae4e8d=
9e31
commit: 4ce6faa6c9591de6079347eccc9e61ae4e8d9e31
branch: master
author: Filipe La=C3=ADns <lains at archlinux.org>
committer: GitHub <noreply at github.com>
date: 2020-08-10T07:48:20-07:00
summary:

bpo-16995: add support for base32 extended hex (base32hex) (GH-20441)



cc @pganssle

Automerge-Triggered-By: @pganssle

files:
A Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
M Doc/library/base64.rst
M Doc/whatsnew/3.10.rst
M Lib/base64.py
M Lib/test/test_base64.py

diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
index 1ff22a00d6199..2f24bb63912fb 100644
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -124,7 +124,7 @@ The modern interface provides:
    whether a lowercase alphabet is acceptable as input.  For security purpos=
es,
    the default is ``False``.
=20
-   :rfc:`3548` allows for optional mapping of the digit 0 (zero) to the lett=
er O
+   :rfc:`4648` allows for optional mapping of the digit 0 (zero) to the lett=
er O
    (oh), and for optional mapping of the digit 1 (one) to either the letter =
I (eye)
    or letter L (el).  The optional argument *map01* when not ``None``, speci=
fies
    which letter the digit 1 should be mapped to (when *map01* is not ``None`=
`, the
@@ -136,6 +136,27 @@ The modern interface provides:
    input.
=20
=20
+.. function:: b32hexencode(s)
+
+   Similar to :func:`b32encode` but uses the Extended Hex Alphabet, as defin=
ed in
+   :rfc:`4648`.
+
+   .. versionadded:: 3.10
+
+
+.. function:: b32hexdecode(s, casefold=3DFalse)
+
+   Similar to :func:`b32decode` but uses the Extended Hex Alphabet, as defin=
ed in
+   :rfc:`4648`.
+
+   This version does not allow the digit 0 (zero) to the letter O (oh) and d=
igit
+   1 (one) to either the letter I (eye) or letter L (el) mappings, all these
+   characters are included in the Extended Hex Alphabet and are not
+   interchangable.
+
+   .. versionadded:: 3.10
+
+
 .. function:: b16encode(s)
=20
    Encode the :term:`bytes-like object` *s* using Base16 and return the
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index 2af0ea3f4dd64..eb5ae01a7c04d 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -103,6 +103,12 @@ New Modules
 Improved Modules
 =3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D=3D
=20
+base64
+------
+
+Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support t=
he
+Base32 Encoding with Extended Hex Alphabet.
+
 curses
 ------
=20
diff --git a/Lib/base64.py b/Lib/base64.py
index a28109f8a7f9c..539ad16f0e86d 100755
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -16,7 +16,7 @@
     'encode', 'decode', 'encodebytes', 'decodebytes',
     # Generalized interface for other encodings
     'b64encode', 'b64decode', 'b32encode', 'b32decode',
-    'b16encode', 'b16decode',
+    'b32hexencode', 'b32hexdecode', 'b16encode', 'b16decode',
     # Base85 and Ascii85 encodings
     'b85encode', 'b85decode', 'a85encode', 'a85decode',
     # Standard Base64 encoding
@@ -135,19 +135,40 @@ def urlsafe_b64decode(s):
=20
=20
 # Base32 encoding/decoding must be done in Python
+_B32_ENCODE_DOCSTRING =3D '''
+Encode the bytes-like objects using {encoding} and return a bytes object.
+'''
+_B32_DECODE_DOCSTRING =3D '''
+Decode the {encoding} encoded bytes-like object or ASCII string s.
+
+Optional casefold is a flag specifying whether a lowercase alphabet is
+acceptable as input.  For security purposes, the default is False.
+{extra_args}
+The result is returned as a bytes object.  A binascii.Error is raised if
+the input is incorrectly padded or if there are non-alphabet
+characters present in the input.
+'''
+_B32_DECODE_MAP01_DOCSTRING =3D '''
+RFC 3548 allows for optional mapping of the digit 0 (zero) to the
+letter O (oh), and for optional mapping of the digit 1 (one) to
+either the letter I (eye) or letter L (el).  The optional argument
+map01 when not None, specifies which letter the digit 1 should be
+mapped to (when map01 is not None, the digit 0 is always mapped to
+the letter O).  For security purposes the default is None, so that
+0 and 1 are not allowed in the input.
+'''
 _b32alphabet =3D b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'
-_b32tab2 =3D None
-_b32rev =3D None
+_b32hexalphabet =3D b'0123456789ABCDEFGHIJKLMNOPQRSTUV'
+_b32tab2 =3D {}
+_b32rev =3D {}
=20
-def b32encode(s):
-    """Encode the bytes-like object s using Base32 and return a bytes object.
-    """
+def _b32encode(alphabet, s):
     global _b32tab2
     # Delay the initialization of the table to not waste memory
     # if the function is never called
-    if _b32tab2 is None:
-        b32tab =3D [bytes((i,)) for i in _b32alphabet]
-        _b32tab2 =3D [a + b for a in b32tab for b in b32tab]
+    if alphabet not in _b32tab2:
+        b32tab =3D [bytes((i,)) for i in alphabet]
+        _b32tab2[alphabet] =3D [a + b for a in b32tab for b in b32tab]
         b32tab =3D None
=20
     if not isinstance(s, bytes_types):
@@ -158,7 +179,7 @@ def b32encode(s):
         s =3D s + b'\0' * (5 - leftover)  # Don't use +=3D !
     encoded =3D bytearray()
     from_bytes =3D int.from_bytes
-    b32tab2 =3D _b32tab2
+    b32tab2 =3D _b32tab2[alphabet]
     for i in range(0, len(s), 5):
         c =3D from_bytes(s[i: i + 5], 'big')
         encoded +=3D (b32tab2[c >> 30] +           # bits 1 - 10
@@ -177,29 +198,12 @@ def b32encode(s):
         encoded[-1:] =3D b'=3D'
     return bytes(encoded)
=20
-def b32decode(s, casefold=3DFalse, map01=3DNone):
-    """Decode the Base32 encoded bytes-like object or ASCII string s.
-
-    Optional casefold is a flag specifying whether a lowercase alphabet is
-    acceptable as input.  For security purposes, the default is False.
-
-    RFC 3548 allows for optional mapping of the digit 0 (zero) to the
-    letter O (oh), and for optional mapping of the digit 1 (one) to
-    either the letter I (eye) or letter L (el).  The optional argument
-    map01 when not None, specifies which letter the digit 1 should be
-    mapped to (when map01 is not None, the digit 0 is always mapped to
-    the letter O).  For security purposes the default is None, so that
-    0 and 1 are not allowed in the input.
-
-    The result is returned as a bytes object.  A binascii.Error is raised if
-    the input is incorrectly padded or if there are non-alphabet
-    characters present in the input.
-    """
+def _b32decode(alphabet, s, casefold=3DFalse, map01=3DNone):
     global _b32rev
     # Delay the initialization of the table to not waste memory
     # if the function is never called
-    if _b32rev is None:
-        _b32rev =3D {v: k for k, v in enumerate(_b32alphabet)}
+    if alphabet not in _b32rev:
+        _b32rev[alphabet] =3D {v: k for k, v in enumerate(alphabet)}
     s =3D _bytes_from_decode_data(s)
     if len(s) % 8:
         raise binascii.Error('Incorrect padding')
@@ -220,7 +224,7 @@ def b32decode(s, casefold=3DFalse, map01=3DNone):
     padchars =3D l - len(s)
     # Now decode the full quanta
     decoded =3D bytearray()
-    b32rev =3D _b32rev
+    b32rev =3D _b32rev[alphabet]
     for i in range(0, len(s), 8):
         quanta =3D s[i: i + 8]
         acc =3D 0
@@ -241,6 +245,26 @@ def b32decode(s, casefold=3DFalse, map01=3DNone):
     return bytes(decoded)
=20
=20
+def b32encode(s):
+    return _b32encode(_b32alphabet, s)
+b32encode.__doc__ =3D _B32_ENCODE_DOCSTRING.format(encoding=3D'base32')
+
+def b32decode(s, casefold=3DFalse, map01=3DNone):
+    return _b32decode(_b32alphabet, s, casefold, map01)
+b32decode.__doc__ =3D _B32_DECODE_DOCSTRING.format(encoding=3D'base32',
+                                        extra_args=3D_B32_DECODE_MAP01_DOCST=
RING)
+
+def b32hexencode(s):
+    return _b32encode(_b32hexalphabet, s)
+b32hexencode.__doc__ =3D _B32_ENCODE_DOCSTRING.format(encoding=3D'base32hex')
+
+def b32hexdecode(s, casefold=3DFalse):
+    # base32hex does not have the 01 mapping
+    return _b32decode(_b32hexalphabet, s, casefold)
+b32hexdecode.__doc__ =3D _B32_DECODE_DOCSTRING.format(encoding=3D'base32hex',
+                                                    extra_args=3D'')
+
+
 # RFC 3548, Base 16 Alphabet specifies uppercase, but hexlify() returns
 # lowercase.  The RFC also recommends against accepting input case
 # insensitively.
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
index 1f67e46cd2267..4f62c4115f60a 100644
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -351,6 +351,76 @@ def test_b32decode_error(self):
                 with self.assertRaises(binascii.Error):
                     base64.b32decode(data.decode('ascii'))
=20
+    def test_b32hexencode(self):
+        test_cases =3D [
+            # to_encode, expected
+            (b'',      b''),
+            (b'\x00',  b'00=3D=3D=3D=3D=3D=3D'),
+            (b'a',     b'C4=3D=3D=3D=3D=3D=3D'),
+            (b'ab',    b'C5H0=3D=3D=3D=3D'),
+            (b'abc',   b'C5H66=3D=3D=3D'),
+            (b'abcd',  b'C5H66P0=3D'),
+            (b'abcde', b'C5H66P35'),
+        ]
+        for to_encode, expected in test_cases:
+            with self.subTest(to_decode=3Dto_encode):
+                self.assertEqual(base64.b32hexencode(to_encode), expected)
+
+    def test_b32hexencode_other_types(self):
+        self.check_other_types(base64.b32hexencode, b'abcd', b'C5H66P0=3D')
+        self.check_encode_type_errors(base64.b32hexencode)
+
+    def test_b32hexdecode(self):
+        test_cases =3D [
+            # to_decode, expected, casefold
+            (b'',         b'',      False),
+            (b'00=3D=3D=3D=3D=3D=3D', b'\x00',  False),
+            (b'C4=3D=3D=3D=3D=3D=3D', b'a',     False),
+            (b'C5H0=3D=3D=3D=3D', b'ab',    False),
+            (b'C5H66=3D=3D=3D', b'abc',   False),
+            (b'C5H66P0=3D', b'abcd',  False),
+            (b'C5H66P35', b'abcde', False),
+            (b'',         b'',      True),
+            (b'00=3D=3D=3D=3D=3D=3D', b'\x00',  True),
+            (b'C4=3D=3D=3D=3D=3D=3D', b'a',     True),
+            (b'C5H0=3D=3D=3D=3D', b'ab',    True),
+            (b'C5H66=3D=3D=3D', b'abc',   True),
+            (b'C5H66P0=3D', b'abcd',  True),
+            (b'C5H66P35', b'abcde', True),
+            (b'c4=3D=3D=3D=3D=3D=3D', b'a',     True),
+            (b'c5h0=3D=3D=3D=3D', b'ab',    True),
+            (b'c5h66=3D=3D=3D', b'abc',   True),
+            (b'c5h66p0=3D', b'abcd',  True),
+            (b'c5h66p35', b'abcde', True),
+        ]
+        for to_decode, expected, casefold in test_cases:
+            with self.subTest(to_decode=3Dto_decode, casefold=3Dcasefold):
+                self.assertEqual(base64.b32hexdecode(to_decode, casefold),
+                                 expected)
+                self.assertEqual(base64.b32hexdecode(to_decode.decode('ascii=
'),
+                                 casefold), expected)
+
+    def test_b32hexdecode_other_types(self):
+        self.check_other_types(base64.b32hexdecode, b'C5H66=3D=3D=3D', b'abc=
')
+        self.check_decode_type_errors(base64.b32hexdecode)
+
+    def test_b32hexdecode_error(self):
+        tests =3D [b'abc', b'ABCDEF=3D=3D', b'=3D=3DABCDEF', b'c4=3D=3D=3D=
=3D=3D=3D']
+        prefixes =3D [b'M', b'ME', b'MFRA', b'MFRGG', b'MFRGGZA', b'MFRGGZDF=
']
+        for i in range(0, 17):
+            if i:
+                tests.append(b'=3D'*i)
+            for prefix in prefixes:
+                if len(prefix) + i !=3D 8:
+                    tests.append(prefix + b'=3D'*i)
+        for data in tests:
+            with self.subTest(to_decode=3Ddata):
+                with self.assertRaises(binascii.Error):
+                    base64.b32hexdecode(data)
+                with self.assertRaises(binascii.Error):
+                    base64.b32hexdecode(data.decode('ascii'))
+
+
     def test_b16encode(self):
         eq =3D self.assertEqual
         eq(base64.b16encode(b'\x01\x02\xab\xcd\xef'), b'0102ABCDEF')
diff --git a/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rs=
t b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
new file mode 100644
index 0000000000000..88b95998d085f
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-05-27-00-09-52.bpo-16995.4niOT7.rst
@@ -0,0 +1,2 @@
+Add :func:`base64.b32hexencode` and :func:`base64.b32hexdecode` to support t=
he
+Base32 Encoding with Extended Hex Alphabet.