[Python-checkins] cpython (3.6): Implement rich comparison for _sre.SRE_Pattern

victor.stinner python-checkins at python.org
Mon Nov 21 10:39:07 EST 2016


https://hg.python.org/cpython/rev/5e8ef1493843
changeset:   105281:5e8ef1493843
branch:      3.6
parent:      105279:a429d29eafbf
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Mon Nov 21 16:35:08 2016 +0100
summary:
  Implement rich comparison for _sre.SRE_Pattern

Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created by
re.compile(), become comparable (only x==y and x!=y operators). This change
should fix the issue #18383: don't duplicate warning filters when the warnings
module is reloaded (thing usually only done in unit tests).

files:
  Lib/test/test_re.py |  47 ++++++++++++++++++++-
  Misc/NEWS           |   7 ++-
  Modules/_sre.c      |  73 ++++++++++++++++++++++++++++++--
  3 files changed, 118 insertions(+), 9 deletions(-)


diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -3,12 +3,13 @@
 import io
 import locale
 import re
-from re import Scanner
 import sre_compile
+import string
 import sys
-import string
 import traceback
 import unittest
+import warnings
+from re import Scanner
 from weakref import proxy
 
 # Misc tests from Tim Peters' re.doc
@@ -1777,6 +1778,48 @@
         self.assertIn('ASCII', str(re.A))
         self.assertIn('DOTALL', str(re.S))
 
+    def test_pattern_compare(self):
+        pattern1 = re.compile('abc', re.IGNORECASE)
+
+        # equal
+        re.purge()
+        pattern2 = re.compile('abc', re.IGNORECASE)
+        self.assertEqual(hash(pattern2), hash(pattern1))
+        self.assertEqual(pattern2, pattern1)
+
+        # not equal: different pattern
+        re.purge()
+        pattern3 = re.compile('XYZ', re.IGNORECASE)
+        # Don't test hash(pattern3) != hash(pattern1) because there is no
+        # warranty that hash values are different
+        self.assertNotEqual(pattern3, pattern1)
+
+        # not equal: different flag (flags=0)
+        re.purge()
+        pattern4 = re.compile('abc')
+        self.assertNotEqual(pattern4, pattern1)
+
+        # only == and != comparison operators are supported
+        with self.assertRaises(TypeError):
+            pattern1 < pattern2
+
+    def test_pattern_compare_bytes(self):
+        pattern1 = re.compile(b'abc')
+
+        # equal: test bytes patterns
+        re.purge()
+        pattern2 = re.compile(b'abc')
+        self.assertEqual(hash(pattern2), hash(pattern1))
+        self.assertEqual(pattern2, pattern1)
+
+        # not equal: pattern of a different types (str vs bytes),
+        # comparison must not raise a BytesWarning
+        re.purge()
+        pattern3 = re.compile('abc')
+        with warnings.catch_warnings():
+            warnings.simplefilter('error', BytesWarning)
+            self.assertNotEqual(pattern3, pattern1)
+
 
 class PatternReprTests(unittest.TestCase):
     def check(self, pattern, expected):
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -42,6 +42,11 @@
 Library
 -------
 
+- Issue #28727: Regular expression patterns, _sre.SRE_Pattern objects created
+  by re.compile(), become comparable (only x==y and x!=y operators). This
+  change should fix the issue #18383: don't duplicate warning filters when the
+  warnings module is reloaded (thing usually only done in unit tests).
+
 - Issue #20572: The subprocess.Popen.wait method's undocumented
   endtime parameter now raises a DeprecationWarning.
 
@@ -77,7 +82,7 @@
 
 - Issue #28703: Fix asyncio.iscoroutinefunction to handle Mock objects.
 
-- Issue #28704: Fix create_unix_server to support Path-like objects 
+- Issue #28704: Fix create_unix_server to support Path-like objects
   (PEP 519).
 
 - Issue #28720: Add collections.abc.AsyncGenerator.
diff --git a/Modules/_sre.c b/Modules/_sre.c
--- a/Modules/_sre.c
+++ b/Modules/_sre.c
@@ -1506,14 +1506,12 @@
 
     self->groups = groups;
 
-    Py_XINCREF(groupindex);
+    Py_INCREF(groupindex);
     self->groupindex = groupindex;
 
-    Py_XINCREF(indexgroup);
+    Py_INCREF(indexgroup);
     self->indexgroup = indexgroup;
 
-    self->weakreflist = NULL;
-
     if (!_validate(self)) {
         Py_DECREF(self);
         return NULL;
@@ -2649,6 +2647,69 @@
     return (PyObject*) scanner;
 }
 
+static Py_hash_t
+pattern_hash(PatternObject *self)
+{
+    Py_hash_t hash, hash2;
+
+    hash = PyObject_Hash(self->pattern);
+    if (hash == -1) {
+        return -1;
+    }
+
+    hash2 = _Py_HashBytes(self->code, sizeof(self->code[0]) * self->codesize);
+    hash ^= hash2;
+
+    hash ^= self->flags;
+    hash ^= self->isbytes;
+    hash ^= self->codesize;
+
+    if (hash == -1) {
+        hash = -2;
+    }
+    return hash;
+}
+
+static PyObject*
+pattern_richcompare(PyObject *lefto, PyObject *righto, int op)
+{
+    PatternObject *left, *right;
+    int cmp;
+
+    if (op != Py_EQ && op != Py_NE) {
+        Py_RETURN_NOTIMPLEMENTED;
+    }
+
+    if (Py_TYPE(lefto) != &Pattern_Type || Py_TYPE(righto) != &Pattern_Type) {
+        Py_RETURN_NOTIMPLEMENTED;
+    }
+    left = (PatternObject *)lefto;
+    right = (PatternObject *)righto;
+
+    cmp = (left->flags == right->flags
+           && left->isbytes == right->isbytes
+           && left->codesize && right->codesize);
+    if (cmp) {
+        /* Compare the code and the pattern because the same pattern can
+           produce different codes depending on the locale used to compile the
+           pattern when the re.LOCALE flag is used. Don't compare groups,
+           indexgroup nor groupindex: they are derivated from the pattern. */
+        cmp = (memcmp(left->code, right->code,
+                      sizeof(left->code[0]) * left->codesize) == 0);
+    }
+    if (cmp) {
+        cmp = PyObject_RichCompareBool(left->pattern, right->pattern,
+                                       Py_EQ);
+        if (cmp < 0) {
+            return NULL;
+        }
+    }
+    if (op == Py_NE) {
+        cmp = !cmp;
+    }
+    return PyBool_FromLong(cmp);
+}
+
 #include "clinic/_sre.c.h"
 
 static PyMethodDef pattern_methods[] = {
@@ -2693,7 +2754,7 @@
     0,                                  /* tp_as_number */
     0,                                  /* tp_as_sequence */
     0,                                  /* tp_as_mapping */
-    0,                                  /* tp_hash */
+    (hashfunc)pattern_hash,             /* tp_hash */
     0,                                  /* tp_call */
     0,                                  /* tp_str */
     0,                                  /* tp_getattro */
@@ -2703,7 +2764,7 @@
     pattern_doc,                        /* tp_doc */
     0,                                  /* tp_traverse */
     0,                                  /* tp_clear */
-    0,                                  /* tp_richcompare */
+    pattern_richcompare,                /* tp_richcompare */
     offsetof(PatternObject, weakreflist),       /* tp_weaklistoffset */
     0,                                  /* tp_iter */
     0,                                  /* tp_iternext */

-- 
Repository URL: https://hg.python.org/cpython


More information about the Python-checkins mailing list