[Python-checkins] bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
Hai Shi
webhook-mailer at python.org
Wed Oct 14 11:43:39 EDT 2020
https://github.com/python/cpython/commit/c5b049b91ca50c615f9a5425055c2b79a82ac547
commit: c5b049b91ca50c615f9a5425055c2b79a82ac547
branch: master
author: Hai Shi <shihai1992 at gmail.com>
committer: GitHub <noreply at github.com>
date: 2020-10-14T17:43:31+02:00
summary:
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII characters (GH-22219)
files:
A Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
M Doc/whatsnew/3.10.rst
M Lib/encodings/__init__.py
M Lib/test/test_codecs.py
diff --git a/Doc/whatsnew/3.10.rst b/Doc/whatsnew/3.10.rst
index c8ddcd2d24296..738ef974e7867 100644
--- a/Doc/whatsnew/3.10.rst
+++ b/Doc/whatsnew/3.10.rst
@@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
support is provided by the underlying ncurses library.
(Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)
+encodings
+---------
+:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
+(Contributed by Hai Shi in :issue:`39337`.)
+
glob
----
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
index ddd5afdcf2dab..4b37d3321c903 100644
--- a/Lib/encodings/__init__.py
+++ b/Lib/encodings/__init__.py
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
- chars.append(c)
+ if c.isascii():
+ chars.append(c)
punct = False
else:
punct = True
diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
index ddf4e08af6247..09ceef76eb098 100644
--- a/Lib/test/test_codecs.py
+++ b/Lib/test/test_codecs.py
@@ -3417,7 +3417,7 @@ def test_rot13_func(self):
class CodecNameNormalizationTest(unittest.TestCase):
"""Test codec name normalization"""
- def test_normalized_encoding(self):
+ def test_codecs_lookup(self):
FOUND = (1, 2, 3, 4)
NOT_FOUND = (None, None, None, None)
def search_function(encoding):
@@ -3439,6 +3439,18 @@ def search_function(encoding):
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
+ def test_encodings_normalize_encoding(self):
+ # encodings.normalize_encoding() ignores non-ASCII characters.
+ normalize = encodings.normalize_encoding
+ self.assertEqual(normalize('utf_8'), 'utf_8')
+ self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
+ self.assertEqual(normalize('utf 8'), 'utf_8')
+ # encodings.normalize_encoding() doesn't convert
+ # characters to lower case.
+ self.assertEqual(normalize('UTF 8'), 'UTF_8')
+ self.assertEqual(normalize('utf.8'), 'utf.8')
+ self.assertEqual(normalize('utf...8'), 'utf...8')
+
if __name__ == "__main__":
unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
new file mode 100644
index 0000000000000..c2b4dbe4d12e8
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -0,0 +1 @@
+:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
More information about the Python-checkins
mailing list