[Python-checkins] gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612)

serhiy-storchaka webhook-mailer at python.org
Wed Aug 16 03:43:49 EDT 2023


https://github.com/python/cpython/commit/abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9
commit: abd9cc52d94b8e2835322b62c29f09bb0e6fcfe9
branch: main
author: SKO <41810398+uyw4687 at users.noreply.github.com>
committer: serhiy-storchaka <storchaka at gmail.com>
date: 2023-08-16T10:43:45+03:00
summary:

gh-100061: Proper fix of the bug in the matching of possessive quantifiers (GH-102612)

Restore the global Input Stream pointer after trying to match a sub-pattern.

Co-authored-by: Ma Lin <animalize at users.noreply.github.com>

files:
A Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst
M Lib/re/_compiler.py
M Lib/test/test_re.py
M Modules/_sre/sre_lib.h

diff --git a/Lib/re/_compiler.py b/Lib/re/_compiler.py
index f5fd160ba0043..d0a4c55caf6e4 100644
--- a/Lib/re/_compiler.py
+++ b/Lib/re/_compiler.py
@@ -100,13 +100,6 @@ def _compile(code, pattern, flags):
                 emit(ANY_ALL)
             else:
                 emit(ANY)
-        elif op is POSSESSIVE_REPEAT:
-            # gh-106052: Possessive quantifiers do not work when the
-            # subpattern contains backtracking, i.e. "(?:ab?c)*+".
-            # Implement it as equivalent greedy qualifier in atomic group.
-            p = [(MAX_REPEAT, av)]
-            p = [(ATOMIC_GROUP, p)]
-            _compile(code, p, flags)
         elif op in REPEATING_CODES:
             if _simple(av[2]):
                 emit(REPEATING_CODES[op][2])
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
index bf3698ac78a88..042f97f57ecf1 100644
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -2342,7 +2342,17 @@ def test_bug_gh91616(self):
         self.assertTrue(re.fullmatch(r'(?s:(?>.*?\.).*)\Z', "a.txt")) # reproducer
         self.assertTrue(re.fullmatch(r'(?s:(?=(?P<g0>.*?\.))(?P=g0).*)\Z', "a.txt"))
 
-    def test_bug_gh106052(self):
+    def test_bug_gh100061(self):
+        # gh-100061
+        self.assertEqual(re.match('(?>(?:.(?!D))+)', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D))++', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?>(?:.(?!D))*)', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D))*+', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?>(?:.(?!D))?)', 'CDE').span(), (0, 0))
+        self.assertEqual(re.match('(?:.(?!D))?+', 'CDE').span(), (0, 0))
+        self.assertEqual(re.match('(?>(?:.(?!D)){1,3})', 'ABCDE').span(), (0, 2))
+        self.assertEqual(re.match('(?:.(?!D)){1,3}+', 'ABCDE').span(), (0, 2))
+        # gh-106052
         self.assertEqual(re.match("(?>(?:ab?c)+)", "aca").span(), (0, 2))
         self.assertEqual(re.match("(?:ab?c)++", "aca").span(), (0, 2))
         self.assertEqual(re.match("(?>(?:ab?c)*)", "aca").span(), (0, 2))
@@ -2451,7 +2461,6 @@ def test_atomic_group(self):
 17: SUCCESS
 ''')
 
-    @unittest.expectedFailure  # gh-106052
     def test_possesive_repeat_one(self):
         self.assertEqual(get_debug_out(r'a?+'), '''\
 POSSESSIVE_REPEAT 0 1
@@ -2464,7 +2473,6 @@ def test_possesive_repeat_one(self):
 12: SUCCESS
 ''')
 
-    @unittest.expectedFailure  # gh-106052
     def test_possesive_repeat(self):
         self.assertEqual(get_debug_out(r'(?:ab)?+'), '''\
 POSSESSIVE_REPEAT 0 1
diff --git a/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst
new file mode 100644
index 0000000000000..dfed34f6ae976
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2023-03-14-01-19-57.gh-issue-100061.CiXJYn.rst
@@ -0,0 +1,2 @@
+Fix a bug that causes wrong matches for regular expressions with possessive
+qualifier.
diff --git a/Modules/_sre/sre_lib.h b/Modules/_sre/sre_lib.h
index c1a774f69090b..ae80009fd63bb 100644
--- a/Modules/_sre/sre_lib.h
+++ b/Modules/_sre/sre_lib.h
@@ -1336,6 +1336,10 @@ SRE(match)(SRE_STATE* state, const SRE_CODE* pattern, int toplevel)
                     MARK_POP(ctx->lastmark);
                     LASTMARK_RESTORE();
 
+                    /* Restore the global Input Stream pointer
+                       since it can change after jumps. */
+                    state->ptr = ptr;
+
                     /* We have sufficient matches, so exit loop. */
                     break;
                 }



More information about the Python-checkins mailing list