[Python-checkins] gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665)
serhiy-storchaka
webhook-mailer at python.org
Fri Apr 22 11:35:32 EDT 2022
https://github.com/python/cpython/commit/6ccfa31421393910b52936e0447625db06f2a655
commit: 6ccfa31421393910b52936e0447625db06f2a655
branch: main
author: Serhiy Storchaka <storchaka at gmail.com>
committer: serhiy-storchaka <storchaka at gmail.com>
date: 2022-04-22T18:35:28+03:00
summary:
gh-90568: Fix exception type for \N with a named sequence in RE (GH-91665)
re.error is now raised instead of TypeError.
files:
A Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
M Lib/re/_parser.py
M Lib/test/test_re.py
diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py
index f191f809a1491..6588862493077 100644
--- a/Lib/re/_parser.py
+++ b/Lib/re/_parser.py
@@ -333,7 +333,7 @@ def _class_escape(source, escape):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
- except KeyError:
+ except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}')) from None
return LITERAL, c
@@ -393,7 +393,7 @@ def _escape(source, escape, state):
charname = source.getuntil('}', 'character name')
try:
c = ord(unicodedata.lookup(charname))
- except KeyError:
+ except (KeyError, TypeError):
raise source.error("undefined character name %r" % charname,
len(charname) + len(r'\N{}')) from None
return LITERAL, c
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 781bfd6ea2eda..2d3fef8589e2a 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -772,6 +772,10 @@ def test_named_unicode_escapes(self):
"undefined character name 'SPAM'", 0)
self.checkPatternError(r'[\N{SPAM}]',
"undefined character name 'SPAM'", 1)
+ self.checkPatternError(r'\N{KEYCAP NUMBER SIGN}',
+ "undefined character name 'KEYCAP NUMBER SIGN'", 0)
+ self.checkPatternError(r'[\N{KEYCAP NUMBER SIGN}]',
+ "undefined character name 'KEYCAP NUMBER SIGN'", 1)
self.checkPatternError(br'\N{LESS-THAN SIGN}', r'bad escape \N', 0)
self.checkPatternError(br'[\N{LESS-THAN SIGN}]', r'bad escape \N', 1)
diff --git a/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
new file mode 100644
index 0000000000000..4411c715830e2
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-18-16-31-33.gh-issue-90568.9kiU7o.rst
@@ -0,0 +1,3 @@
+Parsing ``\N`` escapes of Unicode Named Character Sequences in a
+:mod:`regular expression <re>` raises now :exc:`re.error` instead of
+``TypeError``.
More information about the Python-checkins
mailing list