[issue14260] re.groupindex is available for modification and continues to work, having incorrect data inside it

Serhiy Storchaka report at bugs.python.org
Sat Nov 1 16:27:02 CET 2014


Serhiy Storchaka added the comment:

Here are two patches which implement two alternative solutions. They are based on regex code.

Dict copying patch matches current regex behavior and needs modifying other code to avoid small slowdown. Artificial example:

$ ./python -m timeit -s 'import re; n = 100; m = re.match("".join("(?P<g%d>.)" % g for g in range(n)), "x" * n); t = ",".join(r"\g<g%d>" % g for g in range(n))' -- 'm.expand(t)'

Without patch: 7.48 msec per loop
With re_groupindex_copy.patch but without modifying _expand: 9.61 msec per loop
With re_groupindex_copy.patch and with modifying _expand: 7.41 msec per loop

While stdlib code can be modified, this patch can cause small slowdown of some third-party code.

Dict proxying patch has no performance effect, but it is slightly less compatible. Some code can accept dict but not dict-like object.

----------
keywords: +patch
Added file: http://bugs.python.org/file37098/re_groupindex_copy.patch
Added file: http://bugs.python.org/file37099/re_groupindex_proxy.patch

_______________________________________
Python tracker <report at bugs.python.org>
<http://bugs.python.org/issue14260>
_______________________________________
-------------- next part --------------
diff -r 193ac288bc7f Lib/csv.py
--- a/Lib/csv.py	Sat Nov 01 11:05:36 2014 +0200
+++ b/Lib/csv.py	Sat Nov 01 16:58:33 2014 +0200
@@ -231,20 +231,21 @@ class Sniffer:
         quotes = {}
         delims = {}
         spaces = 0
+        groupindex = regexp.groupindex
         for m in matches:
-            n = regexp.groupindex['quote'] - 1
+            n = groupindex['quote'] - 1
             key = m[n]
             if key:
                 quotes[key] = quotes.get(key, 0) + 1
             try:
-                n = regexp.groupindex['delim'] - 1
+                n = groupindex['delim'] - 1
                 key = m[n]
             except KeyError:
                 continue
             if key and (delimiters is None or key in delimiters):
                 delims[key] = delims.get(key, 0) + 1
             try:
-                n = regexp.groupindex['space'] - 1
+                n = groupindex['space'] - 1
             except KeyError:
                 continue
             if m[n]:
diff -r 193ac288bc7f Lib/sre_parse.py
--- a/Lib/sre_parse.py	Sat Nov 01 11:05:36 2014 +0200
+++ b/Lib/sre_parse.py	Sat Nov 01 16:58:33 2014 +0200
@@ -783,6 +783,7 @@ def parse_template(source, pattern):
             del literal[:]
         groups.append((len(literals), index))
         literals.append(None)
+    groupindex = pattern.groupindex
     while True:
         this = sget()
         if this is None:
@@ -806,7 +807,7 @@ def parse_template(source, pattern):
                     if not name.isidentifier():
                         raise error("bad character in group name")
                     try:
-                        index = pattern.groupindex[name]
+                        index = groupindex[name]
                     except KeyError:
                         msg = "unknown group name: {0!r}".format(name)
                         raise IndexError(msg)
diff -r 193ac288bc7f Lib/test/test_re.py
--- a/Lib/test/test_re.py	Sat Nov 01 11:05:36 2014 +0200
+++ b/Lib/test/test_re.py	Sat Nov 01 16:58:33 2014 +0200
@@ -493,6 +493,13 @@ class ReTests(unittest.TestCase):
         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
         self.assertTrue(re.match("(a)", "a").re)
 
+        # Issue 14260.
+        p = re.compile(r'abc(?P<n>def)')
+        self.assertEqual(sorted(p.groupindex), ["n"])
+        self.assertEqual(p.groupindex["n"], 1)
+        p.groupindex["n"] = 0
+        self.assertEqual(p.groupindex["n"], 1)
+
     def test_special_escapes(self):
         self.assertEqual(re.search(r"\b(b.)\b",
                                    "abcd abc bcd bx").group(1), "bx")
diff -r 193ac288bc7f Modules/_sre.c
--- a/Modules/_sre.c	Sat Nov 01 11:05:36 2014 +0200
+++ b/Modules/_sre.c	Sat Nov 01 16:58:33 2014 +0200
@@ -1371,12 +1371,24 @@ static PyMethodDef pattern_methods[] = {
     {NULL, NULL}
 };
 
+/* PatternObject's 'groupindex' method. */
+static PyObject *
+pattern_groupindex(PatternObject *self)
+{
+    return PyDict_Copy(self->groupindex);
+}
+
+static PyGetSetDef pattern_getset[] = {
+    {"groupindex", (getter)pattern_groupindex, (setter)NULL,
+      "A dictionary mapping group names to group numbers."},
+    {NULL}  /* Sentinel */
+};
+
 #define PAT_OFF(x) offsetof(PatternObject, x)
 static PyMemberDef pattern_members[] = {
     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
-    {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
     {NULL}  /* Sentinel */
 };
 
@@ -1409,6 +1421,7 @@ static PyTypeObject Pattern_Type = {
     0,                                  /* tp_iternext */
     pattern_methods,                    /* tp_methods */
     pattern_members,                    /* tp_members */
+    pattern_getset,                     /* tp_getset */
 };
 
 static int _validate(PatternObject *self); /* Forward */
-------------- next part --------------
diff -r 193ac288bc7f Lib/test/test_re.py
--- a/Lib/test/test_re.py	Sat Nov 01 11:05:36 2014 +0200
+++ b/Lib/test/test_re.py	Sat Nov 01 16:48:22 2014 +0200
@@ -493,6 +493,13 @@ class ReTests(unittest.TestCase):
         self.assertEqual(re.match("(a)", "a").regs, ((0, 1), (0, 1)))
         self.assertTrue(re.match("(a)", "a").re)
 
+        # Issue 14260.
+        p = re.compile(r'abc(?P<n>def)')
+        self.assertEqual(sorted(p.groupindex), ["n"])
+        self.assertEqual(p.groupindex["n"], 1)
+        self.asserRaises(TypeError):
+            p.groupindex["n"] = 0
+
     def test_special_escapes(self):
         self.assertEqual(re.search(r"\b(b.)\b",
                                    "abcd abc bcd bx").group(1), "bx")
diff -r 193ac288bc7f Modules/_sre.c
--- a/Modules/_sre.c	Sat Nov 01 11:05:36 2014 +0200
+++ b/Modules/_sre.c	Sat Nov 01 16:48:22 2014 +0200
@@ -1371,12 +1371,24 @@ static PyMethodDef pattern_methods[] = {
     {NULL, NULL}
 };
 
+/* PatternObject's 'groupindex' method. */
+static PyObject *
+pattern_groupindex(PatternObject *self)
+{
+    return PyDictProxy_New(self->groupindex);
+}
+
+static PyGetSetDef pattern_getset[] = {
+    {"groupindex", (getter)pattern_groupindex, (setter)NULL,
+      "A dictionary mapping group names to group numbers."},
+    {NULL}  /* Sentinel */
+};
+
 #define PAT_OFF(x) offsetof(PatternObject, x)
 static PyMemberDef pattern_members[] = {
     {"pattern",    T_OBJECT,    PAT_OFF(pattern),       READONLY},
     {"flags",      T_INT,       PAT_OFF(flags),         READONLY},
     {"groups",     T_PYSSIZET,  PAT_OFF(groups),        READONLY},
-    {"groupindex", T_OBJECT,    PAT_OFF(groupindex),    READONLY},
     {NULL}  /* Sentinel */
 };
 
@@ -1409,6 +1421,7 @@ static PyTypeObject Pattern_Type = {
     0,                                  /* tp_iternext */
     pattern_methods,                    /* tp_methods */
     pattern_members,                    /* tp_members */
+    pattern_getset,                     /* tp_getset */
 };
 
 static int _validate(PatternObject *self); /* Forward */


More information about the Python-bugs-list mailing list