[Python-checkins] cpython: Issue #13592: Improved the repr for regular expression pattern objects.

serhiy.storchaka python-checkins at python.org
Sat Nov 23 21:45:27 CET 2013


http://hg.python.org/cpython/rev/8c00677da6c0
changeset:   87458:8c00677da6c0
parent:      87456:c3fd79b17983
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sat Nov 23 22:42:43 2013 +0200
summary:
  Issue #13592: Improved the repr for regular expression pattern objects.
Based on patch by Hugo Lopes Tavares.

files:
  Lib/sre_constants.py    |   2 +
  Lib/test/test_re.py     |  62 +++++++++++++++++++++
  Misc/NEWS               |   3 +
  Modules/_sre.c          |  82 ++++++++++++++++++++++++++++-
  Modules/sre_constants.h |   2 +
  5 files changed, 150 insertions(+), 1 deletions(-)


diff --git a/Lib/sre_constants.py b/Lib/sre_constants.py
--- a/Lib/sre_constants.py
+++ b/Lib/sre_constants.py
@@ -250,6 +250,8 @@
     f.write("#define SRE_FLAG_DOTALL %d\n" % SRE_FLAG_DOTALL)
     f.write("#define SRE_FLAG_UNICODE %d\n" % SRE_FLAG_UNICODE)
     f.write("#define SRE_FLAG_VERBOSE %d\n" % SRE_FLAG_VERBOSE)
+    f.write("#define SRE_FLAG_DEBUG %d\n" % SRE_FLAG_DEBUG)
+    f.write("#define SRE_FLAG_ASCII %d\n" % SRE_FLAG_ASCII)
 
     f.write("#define SRE_INFO_PREFIX %d\n" % SRE_INFO_PREFIX)
     f.write("#define SRE_INFO_LITERAL %d\n" % SRE_INFO_LITERAL)
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1164,6 +1164,68 @@
                 self.assertEqual(m.group(2), "y")
 
 
+class PatternReprTests(unittest.TestCase):
+    def check(self, pattern, expected):
+        self.assertEqual(repr(re.compile(pattern)), expected)
+
+    def check_flags(self, pattern, flags, expected):
+        self.assertEqual(repr(re.compile(pattern, flags)), expected)
+
+    def test_without_flags(self):
+        self.check('random pattern',
+                   "re.compile('random pattern')")
+
+    def test_single_flag(self):
+        self.check_flags('random pattern', re.IGNORECASE,
+            "re.compile('random pattern', re.IGNORECASE)")
+
+    def test_multiple_flags(self):
+        self.check_flags('random pattern', re.I|re.S|re.X,
+            "re.compile('random pattern', "
+            "re.IGNORECASE|re.DOTALL|re.VERBOSE)")
+
+    def test_unicode_flag(self):
+        self.check_flags('random pattern', re.U,
+                         "re.compile('random pattern')")
+        self.check_flags('random pattern', re.I|re.S|re.U,
+                         "re.compile('random pattern', "
+                         "re.IGNORECASE|re.DOTALL)")
+
+    def test_inline_flags(self):
+        self.check('(?i)pattern',
+                   "re.compile('(?i)pattern', re.IGNORECASE)")
+
+    def test_unknown_flags(self):
+        self.check_flags('random pattern', 0x123000,
+                         "re.compile('random pattern', 0x123000)")
+        self.check_flags('random pattern', 0x123000|re.I,
+            "re.compile('random pattern', re.IGNORECASE|0x123000)")
+
+    def test_bytes(self):
+        self.check(b'bytes pattern',
+                   "re.compile(b'bytes pattern')")
+        self.check_flags(b'bytes pattern', re.A,
+                         "re.compile(b'bytes pattern', re.ASCII)")
+
+    def test_quotes(self):
+        self.check('random "double quoted" pattern',
+            '''re.compile('random "double quoted" pattern')''')
+        self.check("random 'single quoted' pattern",
+            '''re.compile("random 'single quoted' pattern")''')
+        self.check('''both 'single' and "double" quotes''',
+            '''re.compile('both \\'single\\' and "double" quotes')''')
+
+    def test_long_pattern(self):
+        pattern = 'Very %spattern' % ('long ' * 1000)
+        r = repr(re.compile(pattern))
+        self.assertLess(len(r), 300)
+        self.assertEqual(r[:30], "re.compile('Very long long lon")
+        r = repr(re.compile(pattern, re.I))
+        self.assertLess(len(r), 300)
+        self.assertEqual(r[:30], "re.compile('Very long long lon")
+        self.assertEqual(r[-16:], ", re.IGNORECASE)")
+
+
 class ImplementationTest(unittest.TestCase):
     """
     Test implementation details of the re module.
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -68,6 +68,9 @@
 Library
 -------
 
+- Issue #13592: Improved the repr for regular expression pattern objects.
+  Based on patch by Hugo Lopes Tavares.
+
 - Issue #19641: Added the audioop.byteswap() function to convert big-endian
   samples to little-endian and vice versa.
 
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1139,6 +1139,86 @@
 #endif
 }
 
+static PyObject *
+pattern_repr(PatternObject *obj)
+{
+    static const struct {
+        const char *name;
+        int value;
+    } flag_names[] = {
+        {"re.TEMPLATE", SRE_FLAG_TEMPLATE},
+        {"re.IGNORECASE", SRE_FLAG_IGNORECASE},
+        {"re.LOCALE", SRE_FLAG_LOCALE},
+        {"re.MULTILINE", SRE_FLAG_MULTILINE},
+        {"re.DOTALL", SRE_FLAG_DOTALL},
+        {"re.UNICODE", SRE_FLAG_UNICODE},
+        {"re.VERBOSE", SRE_FLAG_VERBOSE},
+        {"re.DEBUG", SRE_FLAG_DEBUG},
+        {"re.ASCII", SRE_FLAG_ASCII},
+    };
+    PyObject *result = NULL;
+    PyObject *flag_items;
+    int i;
+    int flags = obj->flags;
+
+    /* Omit re.UNICODE for valid string patterns. */
+    if (obj->isbytes == 0 &&
+        (flags & (SRE_FLAG_LOCALE|SRE_FLAG_UNICODE|SRE_FLAG_ASCII)) ==
+         SRE_FLAG_UNICODE)
+        flags &= ~SRE_FLAG_UNICODE;
+
+    flag_items = PyList_New(0);
+    if (!flag_items)
+        return NULL;
+
+    for (i = 0; i < Py_ARRAY_LENGTH(flag_names); i++) {
+        if (flags & flag_names[i].value) {
+            PyObject *item = PyUnicode_FromString(flag_names[i].name);
+            if (!item)
+                goto done;
+
+            if (PyList_Append(flag_items, item) < 0) {
+                Py_DECREF(item);
+                goto done;
+            }
+            Py_DECREF(item);
+            flags &= ~flag_names[i].value;
+        }
+    }
+    if (flags) {
+        PyObject *item = PyUnicode_FromFormat("0x%x", flags);
+        if (!item)
+            goto done;
+
+        if (PyList_Append(flag_items, item) < 0) {
+            Py_DECREF(item);
+            goto done;
+        }
+        Py_DECREF(item);
+    }
+
+    if (PyList_Size(flag_items) > 0) {
+        PyObject *flags_result;
+        PyObject *sep = PyUnicode_FromString("|");
+        if (!sep)
+            goto done;
+        flags_result = PyUnicode_Join(sep, flag_items);
+        Py_DECREF(sep);
+        if (!flags_result)
+            goto done;
+        result = PyUnicode_FromFormat("re.compile(%.200R, %S)",
+                                      obj->pattern, flags_result);
+        Py_DECREF(flags_result);
+    }
+    else {
+        result = PyUnicode_FromFormat("re.compile(%.200R)", obj->pattern);
+    }
+
+done:
+    Py_DECREF(flag_items);
+    return result;
+}
+
 PyDoc_STRVAR(pattern_match_doc,
 "match(string[, pos[, endpos]]) -> match object or None.\n\
     Matches zero or more characters at the beginning of the string");
@@ -1214,7 +1294,7 @@
     0,                                  /* tp_getattr */
     0,                                  /* tp_setattr */
     0,                                  /* tp_reserved */
-    0,                                  /* tp_repr */
+    (reprfunc)pattern_repr,             /* tp_repr */
     0,                                  /* tp_as_number */
     0,                                  /* tp_as_sequence */
     0,                                  /* tp_as_mapping */
diff --git a/Modules/sre_constants.h b/Modules/sre_constants.h
--- a/Modules/sre_constants.h
+++ b/Modules/sre_constants.h
@@ -81,6 +81,8 @@
 #define SRE_FLAG_DOTALL 16
 #define SRE_FLAG_UNICODE 32
 #define SRE_FLAG_VERBOSE 64
+#define SRE_FLAG_DEBUG 128
+#define SRE_FLAG_ASCII 256
 #define SRE_INFO_PREFIX 1
 #define SRE_INFO_LITERAL 2
 #define SRE_INFO_CHARSET 4

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list