[Python-checkins] gh-91700: Validate the group number in conditional expression in RE (GH-91702)
serhiy-storchaka
webhook-mailer at python.org
Fri Apr 22 12:53:15 EDT 2022
https://github.com/python/cpython/commit/48ec61a89a959071206549819448405c2cea61b0
commit: 48ec61a89a959071206549819448405c2cea61b0
branch: main
author: Serhiy Storchaka <storchaka at gmail.com>
committer: serhiy-storchaka <storchaka at gmail.com>
date: 2022-04-22T19:53:10+03:00
summary:
gh-91700: Validate the group number in conditional expression in RE (GH-91702)
In expression (?(group)...) an appropriate re.error is now
raised if the group number refers to not defined group.
Previously it raised RuntimeError: invalid SRE code.
files:
A Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst
M Lib/re/_parser.py
M Lib/test/test_re.py
diff --git a/Lib/re/_parser.py b/Lib/re/_parser.py
index 6588862493077..60ec3e8ba8bd5 100644
--- a/Lib/re/_parser.py
+++ b/Lib/re/_parser.py
@@ -77,6 +77,7 @@ def __init__(self):
self.groupdict = {}
self.groupwidths = [None] # group 0
self.lookbehindgroups = None
+ self.grouprefpos = {}
@property
def groups(self):
return len(self.groupwidths)
@@ -795,6 +796,10 @@ def _parse(source, state, verbose, nested, first=False):
if condgroup >= MAXGROUPS:
msg = "invalid group reference %d" % condgroup
raise source.error(msg, len(condname) + 1)
+ if condgroup not in state.grouprefpos:
+ state.grouprefpos[condgroup] = (
+ source.tell() - len(condname) - 1
+ )
state.checklookbehindgroup(condgroup, source)
item_yes = _parse(source, state, verbose, nested + 1)
if source.match("|"):
@@ -975,6 +980,11 @@ def parse(str, flags=0, state=None):
assert source.next == ")"
raise source.error("unbalanced parenthesis")
+ for g in p.state.grouprefpos:
+ if g >= p.state.groups:
+ msg = "invalid group reference %d" % g
+ raise error(msg, str, p.state.grouprefpos[g])
+
if flags & SRE_FLAG_DEBUG:
p.dump()
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index 2d3fef8589e2a..700275063f0f1 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -593,6 +593,8 @@ def test_re_groupref_exists_errors(self):
self.checkPatternError(r'()(?(1)a|b|c)',
'conditional backref with more than '
'two branches', 10)
+ self.checkPatternError(r'()(?(2)a)',
+ "invalid group reference 2", 5)
def test_re_groupref_overflow(self):
from re._constants import MAXGROUPS
diff --git a/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst
new file mode 100644
index 0000000000000..73b106869697b
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2022-04-19-17-30-17.gh-issue-91700.MRJi6m.rst
@@ -0,0 +1,4 @@
+Compilation of regular expression containing a conditional expression
+``(?(group)...)`` now raises an appropriate :exc:`re.error` if the group
+number refers to not defined group. Previously an internal RuntimeError was
+raised.
More information about the Python-checkins
mailing list