[Python-checkins] cpython (2.7): Issue #22088: Clarify base-64 alphabets and which characters are discarded
martin.panter
python-checkins at python.org
Tue Feb 23 20:19:08 EST 2016
https://hg.python.org/cpython/rev/c62526580ff0
changeset: 100306:c62526580ff0
branch: 2.7
parent: 100303:52a8c1965750
user: Martin Panter <vadmium+py at gmail.com>
date: Tue Feb 23 22:30:50 2016 +0000
summary:
Issue #22088: Clarify base-64 alphabets and which characters are discarded
* There are only two base-64 alphabets defined by the RFCs, not three
* Due to the internal translation, plus (+) and slash (/) are never discarded
* standard_ and urlsafe_b64decode() discard characters as well
files:
Doc/library/base64.rst | 15 +++++++++------
Lib/base64.py | 29 ++++++++++++++++-------------
Lib/test/test_base64.py | 11 +++++++++++
3 files changed, 36 insertions(+), 19 deletions(-)
diff --git a/Doc/library/base64.rst b/Doc/library/base64.rst
--- a/Doc/library/base64.rst
+++ b/Doc/library/base64.rst
@@ -16,8 +16,8 @@
encoding algorithm is not the same as the :program:`uuencode` program.
There are two interfaces provided by this module. The modern interface supports
-encoding and decoding string objects using all three :rfc:`3548` defined
-alphabets (normal, URL-safe, and filesystem-safe). The legacy
+encoding and decoding string objects using both base-64 alphabets defined
+in :rfc:`3548` (normal, and URL- and filesystem-safe). The legacy
interface provides for encoding and decoding to and from file-like objects as
well as strings, but only using the Base64 standard alphabet.
@@ -26,7 +26,7 @@
.. function:: b64encode(s[, altchars])
- Encode a string use Base64.
+ Encode a string using Base64.
*s* is the string to encode. Optional *altchars* must be a string of at least
length 2 (additional characters are ignored) which specifies an alternative
@@ -46,7 +46,8 @@
alphabet used instead of the ``+`` and ``/`` characters.
The decoded string is returned. A :exc:`TypeError` is raised if *s* is
- incorrectly padded. Non-base64-alphabet characters are
+ incorrectly padded. Characters that are neither
+ in the normal base-64 alphabet nor the alternative alphabet are
discarded prior to the padding check.
@@ -62,14 +63,16 @@
.. function:: urlsafe_b64encode(s)
- Encode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of
+ Encode string *s* using the URL- and filesystem-safe
+ alphabet, which substitutes ``-`` instead of
``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet. The result
can still contain ``=``.
.. function:: urlsafe_b64decode(s)
- Decode string *s* using a URL-safe alphabet, which substitutes ``-`` instead of
+ Decode string *s* using the URL- and filesystem-safe
+ alphabet, which substitutes ``-`` instead of
``+`` and ``_`` instead of ``/`` in the standard Base64 alphabet.
diff --git a/Lib/base64.py b/Lib/base64.py
--- a/Lib/base64.py
+++ b/Lib/base64.py
@@ -65,8 +65,9 @@
alternative alphabet used instead of the '+' and '/' characters.
The decoded string is returned. A TypeError is raised if s is
- incorrectly padded. Non-base64-alphabet characters are discarded prior
- to the padding check.
+ incorrectly padded. Characters that are neither in the normal base-64
+ alphabet nor the alternative alphabet are discarded prior to the padding
+ check.
"""
if altchars is not None:
s = s.translate(string.maketrans(altchars[:2], '+/'))
@@ -87,9 +88,10 @@
def standard_b64decode(s):
"""Decode a string encoded with the standard Base64 alphabet.
- s is the string to decode. The decoded string is returned. A TypeError
- is raised if the string is incorrectly padded or if there are non-alphabet
- characters present in the string.
+ Argument s is the string to decode. The decoded string is returned. A
+ TypeError is raised if the string is incorrectly padded. Characters that
+ are not in the standard alphabet are discarded prior to the padding
+ check.
"""
return b64decode(s)
@@ -97,19 +99,20 @@
_urlsafe_decode_translation = string.maketrans(b'-_', b'+/')
def urlsafe_b64encode(s):
- """Encode a string using a url-safe Base64 alphabet.
+ """Encode a string using the URL- and filesystem-safe Base64 alphabet.
- s is the string to encode. The encoded string is returned. The alphabet
- uses '-' instead of '+' and '_' instead of '/'.
+ Argument s is the string to encode. The encoded string is returned. The
+ alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
return b64encode(s).translate(_urlsafe_encode_translation)
def urlsafe_b64decode(s):
- """Decode a string encoded with the standard Base64 alphabet.
+ """Decode a string using the URL- and filesystem-safe Base64 alphabet.
- s is the string to decode. The decoded string is returned. A TypeError
- is raised if the string is incorrectly padded or if there are non-alphabet
- characters present in the string.
+ Argument s is the string to decode. The decoded string is returned. A
+ TypeError is raised if the string is incorrectly padded. Characters that
+ are not in the URL-safe base-64 alphabet, and are not a plus '+' or slash
+ '/', are discarded prior to the padding check.
The alphabet uses '-' instead of '+' and '_' instead of '/'.
"""
@@ -267,7 +270,7 @@
a lowercase alphabet is acceptable as input. For security purposes, the
default is False.
- The decoded string is returned. A TypeError is raised if s were
+ The decoded string is returned. A TypeError is raised if s is
incorrectly padded or if there are non-alphabet characters present in the
string.
"""
diff --git a/Lib/test/test_base64.py b/Lib/test/test_base64.py
--- a/Lib/test/test_base64.py
+++ b/Lib/test/test_base64.py
@@ -153,6 +153,13 @@
(b'YWJj\nYWI=', b'abcab'))
for bstr, res in tests:
self.assertEqual(base64.b64decode(bstr), res)
+ self.assertEqual(base64.standard_b64decode(bstr), res)
+ self.assertEqual(base64.urlsafe_b64decode(bstr), res)
+
+ # Normal alphabet characters not discarded when alternative given
+ res = b'\xFB\xEF\xBE\xFF\xFF\xFF'
+ self.assertEqual(base64.b64decode(b'++[[//]]', b'[]'), res)
+ self.assertEqual(base64.urlsafe_b64decode(b'++--//__'), res)
def test_b32encode(self):
eq = self.assertEqual
@@ -220,6 +227,10 @@
eq(base64.b16decode('0102abcdef', True), '\x01\x02\xab\xcd\xef')
# Non-bytes
eq(base64.b16decode(bytearray("0102ABCDEF")), '\x01\x02\xab\xcd\xef')
+ # Non-alphabet characters
+ self.assertRaises(TypeError, base64.b16decode, '0102AG')
+ # Incorrect "padding"
+ self.assertRaises(TypeError, base64.b16decode, '010')
--
Repository URL: https://hg.python.org/cpython
More information about the Python-checkins
mailing list