[Python-checkins] cpython (2.7): Now re tests work again on Unicode-disabled build.

Sun Sep 14 16:43:26 CEST 2014

http://hg.python.org/cpython/rev/8f678db15869
changeset:   92422:8f678db15869
branch:      2.7
parent:      92419:fe8ba0ade9a3
user:        Serhiy Storchaka <storchaka at gmail.com>
date:        Sun Sep 14 17:40:44 2014 +0300
summary:
  Now re tests work again on Unicode-disabled build.
Simplified existing detections of Unicode-disabled build.

files:
  Lib/test/re_tests.py |   2 +-
  Lib/test/test_re.py  |  99 ++++++++++++++++---------------
  2 files changed, 53 insertions(+), 48 deletions(-)

diff --git a/Lib/test/re_tests.py b/Lib/test/re_tests.py
--- a/Lib/test/re_tests.py
+++ b/Lib/test/re_tests.py
@@ -663,7 +663,7 @@
 
 try:
     u = eval("u'\N{LATIN CAPITAL LETTER A WITH DIAERESIS}'")
-except SyntaxError:
+except (SyntaxError, ValueError):
     pass
 else:
     tests.extend([
diff --git a/Lib/test/test_re.py b/Lib/test/test_re.py
--- a/Lib/test/test_re.py
+++ b/Lib/test/test_re.py
@@ -1,6 +1,6 @@
 from test.test_support import verbose, run_unittest, import_module
 from test.test_support import precisionbigmemtest, _2G, cpython_only
-from test.test_support import captured_stdout
+from test.test_support import captured_stdout, have_unicode, requires_unicode, u
 import re
 from re import Scanner
 import sre_constants
@@ -86,6 +86,7 @@
         self.assertEqual(re.sub('\r\n', '\n', 'abc\r\ndef\r\n'),
                          'abc\ndef\n')
 
+    @requires_unicode
     def test_bug_1140(self):
         # re.sub(x, y, u'') should return u'', not '', and
         # re.sub(x, y, '') should return '', not u''.
@@ -376,10 +377,11 @@
                                    "abcd abc bcd bx", re.LOCALE).group(1), "bx")
         self.assertEqual(re.search(r"\B(b.)\B",
                                    "abc bcd bc abxd", re.LOCALE).group(1), "bx")
-        self.assertEqual(re.search(r"\b(b.)\b",
-                                   "abcd abc bcd bx", re.UNICODE).group(1), "bx")
-        self.assertEqual(re.search(r"\B(b.)\B",
-                                   "abc bcd bc abxd", re.UNICODE).group(1), "bx")
+        if have_unicode:
+            self.assertEqual(re.search(r"\b(b.)\b",
+                                       "abcd abc bcd bx", re.UNICODE).group(1), "bx")
+            self.assertEqual(re.search(r"\B(b.)\B",
+                                       "abc bcd bc abxd", re.UNICODE).group(1), "bx")
         self.assertEqual(re.search(r"^abc$", "\nabc\n", re.M).group(0), "abc")
         self.assertEqual(re.search(r"^\Aabc\Z$", "abc", re.M).group(0), "abc")
         self.assertIsNone(re.search(r"^\Aabc\Z$", "\nabc\n", re.M))
@@ -394,8 +396,9 @@
                                    "1aa! a").group(0), "1aa! a")
         self.assertEqual(re.search(r"\d\D\w\W\s\S",
                                    "1aa! a", re.LOCALE).group(0), "1aa! a")
-        self.assertEqual(re.search(r"\d\D\w\W\s\S",
-                                   "1aa! a", re.UNICODE).group(0), "1aa! a")
+        if have_unicode:
+            self.assertEqual(re.search(r"\d\D\w\W\s\S",
+                                       "1aa! a", re.UNICODE).group(0), "1aa! a")
 
     def test_string_boundaries(self):
         # See http://bugs.python.org/issue10713
@@ -423,13 +426,14 @@
         # Can match around the whitespace.
         self.assertEqual(len(re.findall(r"\B", " ")), 2)
 
+    @requires_unicode
     def test_bigcharset(self):
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222").group(1), u"\u2222")
-        self.assertEqual(re.match(u"([\u2222\u2223])",
-                                  u"\u2222", re.UNICODE).group(1), u"\u2222")
+        self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+                                  unichr(0x2222)).group(1), unichr(0x2222))
+        self.assertEqual(re.match(u(r"([\u2222\u2223])"),
+                                  unichr(0x2222), re.UNICODE).group(1), unichr(0x2222))
         r = u'[%s]' % u''.join(map(unichr, range(256, 2**16, 255)))
-        self.assertEqual(re.match(r, u"\uff01", re.UNICODE).group(), u"\uff01")
+        self.assertEqual(re.match(r, unichr(0xff01), re.UNICODE).group(), unichr(0xff01))
 
     def test_big_codesize(self):
         # Issue #1160
@@ -476,7 +480,8 @@
         import _sre
         self.assertEqual(_sre.getlower(ord('A'), 0), ord('a'))
         self.assertEqual(_sre.getlower(ord('A'), re.LOCALE), ord('a'))
-        self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
+        if have_unicode:
+            self.assertEqual(_sre.getlower(ord('A'), re.UNICODE), ord('a'))
 
         self.assertEqual(re.match("abc", "ABC", re.I).group(0), "ABC")
         self.assertEqual(re.match("abc", u"ABC", re.I).group(0), "ABC")
@@ -503,8 +508,9 @@
         self.assertEqual(m.group(), match)
         self.assertEqual(m.span(), span)
 
+    @requires_unicode
     def test_re_escape(self):
-        alnum_chars = string.ascii_letters + string.digits
+        alnum_chars = unicode(string.ascii_letters + string.digits)
         p = u''.join(unichr(i) for i in range(256))
         for c in p:
             if c in alnum_chars:
@@ -517,7 +523,7 @@
         self.assertMatch(re.escape(p), p)
 
     def test_re_escape_byte(self):
-        alnum_chars = (string.ascii_letters + string.digits).encode('ascii')
+        alnum_chars = string.ascii_letters + string.digits
         p = ''.join(chr(i) for i in range(256))
         for b in p:
             if b in alnum_chars:
@@ -529,20 +535,21 @@
             self.assertMatch(re.escape(b), b)
         self.assertMatch(re.escape(p), p)
 
+    @requires_unicode
     def test_re_escape_non_ascii(self):
-        s = u'xxx\u2620\u2620\u2620xxx'
+        s = u(r'xxx\u2620\u2620\u2620xxx')
         s_escaped = re.escape(s)
-        self.assertEqual(s_escaped, u'xxx\\\u2620\\\u2620\\\u2620xxx')
+        self.assertEqual(s_escaped, u(r'xxx\\\u2620\\\u2620\\\u2620xxx'))
         self.assertMatch(s_escaped, s)
-        self.assertMatch(u'.%s+.' % re.escape(u'\u2620'), s,
-                         u'x\u2620\u2620\u2620x', (2, 7), re.search)
+        self.assertMatch(u'.%s+.' % re.escape(unichr(0x2620)), s,
+                         u(r'x\u2620\u2620\u2620x'), (2, 7), re.search)
 
     def test_re_escape_non_ascii_bytes(self):
-        b = u'y\u2620y\u2620y'.encode('utf-8')
+        b = b'y\xe2\x98\xa0y\xe2\x98\xa0y'
         b_escaped = re.escape(b)
         self.assertEqual(b_escaped, b'y\\\xe2\\\x98\\\xa0y\\\xe2\\\x98\\\xa0y')
         self.assertMatch(b_escaped, b)
-        res = re.findall(re.escape(u'\u2620'.encode('utf-8')), b)
+        res = re.findall(re.escape(b'\xe2\x98\xa0'), b)
         self.assertEqual(len(res), 2)
 
     def test_pickling(self):
@@ -621,8 +628,9 @@
         # non-recursive scheme was implemented.
         self.assertEqual(re.search('(a|b)*?c', 10000*'ab'+'cd').end(0), 20001)
 
+    @requires_unicode
     def test_bug_612074(self):
-        pat=u"["+re.escape(u"\u2039")+u"]"
+        pat=u"["+re.escape(unichr(0x2039))+u"]"
         self.assertEqual(re.compile(pat) and 1, 1)
 
     def test_stack_overflow(self):
@@ -696,12 +704,9 @@
         self.assertEqual(re.match('(a)((?!(b)*))*', 'abb').groups(),
                          ('a', None, None))
 
+    @requires_unicode
     def test_bug_764548(self):
         # bug 764548, re.compile() barfs on str/unicode subclasses
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
         class my_unicode(unicode): pass
         pat = re.compile(my_unicode("abc"))
         self.assertIsNone(pat.match("xyz"))
@@ -711,20 +716,14 @@
         self.assertEqual([item.group(0) for item in iter],
                          [":", "::", ":::"])
 
+    @requires_unicode
     def test_bug_926075(self):
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
         self.assertIsNot(re.compile('bug_926075'),
-                         re.compile(eval("u'bug_926075'")))
+                         re.compile(u'bug_926075'))
 
+    @requires_unicode
     def test_bug_931848(self):
-        try:
-            unicode
-        except NameError:
-            self.skipTest('no problem if we have no unicode')
-        pattern = eval('u"[\u002E\u3002\uFF0E\uFF61]"')
+        pattern = u(r"[\u002E\u3002\uFF0E\uFF61]")
         self.assertEqual(re.compile(pattern).split("a.b.c"),
                          ['a','b','c'])
 
@@ -743,23 +742,24 @@
         self.assertEqual(iter.next().span(), (4, 4))
         self.assertRaises(StopIteration, iter.next)
 
+    @requires_unicode
     def test_bug_6561(self):
         # '\d' should match characters in Unicode category 'Nd'
         # (Number, Decimal Digit), but not those in 'Nl' (Number,
         # Letter) or 'No' (Number, Other).
         decimal_digits = [
-            u'\u0037', # '\N{DIGIT SEVEN}', category 'Nd'
-            u'\u0e58', # '\N{THAI DIGIT SIX}', category 'Nd'
-            u'\uff10', # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
+            unichr(0x0037), # '\N{DIGIT SEVEN}', category 'Nd'
+            unichr(0x0e58), # '\N{THAI DIGIT SIX}', category 'Nd'
+            unichr(0xff10), # '\N{FULLWIDTH DIGIT ZERO}', category 'Nd'
             ]
         for x in decimal_digits:
             self.assertEqual(re.match('^\d$', x, re.UNICODE).group(0), x)
 
         not_decimal_digits = [
-            u'\u2165', # '\N{ROMAN NUMERAL SIX}', category 'Nl'
-            u'\u3039', # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
-            u'\u2082', # '\N{SUBSCRIPT TWO}', category 'No'
-            u'\u32b4', # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
+            unichr(0x2165), # '\N{ROMAN NUMERAL SIX}', category 'Nl'
+            unichr(0x3039), # '\N{HANGZHOU NUMERAL TWENTY}', category 'Nl'
+            unichr(0x2082), # '\N{SUBSCRIPT TWO}', category 'No'
+            unichr(0x32b4), # '\N{CIRCLED NUMBER THIRTY NINE}', category 'No'
             ]
         for x in not_decimal_digits:
             self.assertIsNone(re.match('^\d$', x, re.UNICODE))
@@ -767,11 +767,15 @@
     def test_empty_array(self):
         # SF buf 1647541
         import array
-        for typecode in 'cbBuhHiIlLfd':
+        typecodes = 'cbBhHiIlLfd'
+        if have_unicode:
+            typecodes += 'u'
+        for typecode in typecodes:
             a = array.array(typecode)
             self.assertIsNone(re.compile("bla").match(a))
             self.assertEqual(re.compile("").match(a).groups(), ())
 
+    @requires_unicode
     def test_inline_flags(self):
         # Bug #1700
         upper_char = unichr(0x1ea0) # Latin Capital Letter A with Dot Bellow
@@ -906,9 +910,10 @@
                 pattern = '.' + reps + mod + 'yz'
                 self.assertEqual(re.compile(pattern, re.S).findall('xyz'),
                                  ['xyz'], msg=pattern)
-                pattern = pattern.encode()
-                self.assertEqual(re.compile(pattern, re.S).findall(b'xyz'),
-                                 [b'xyz'], msg=pattern)
+                if have_unicode:
+                    pattern = unicode(pattern)
+                    self.assertEqual(re.compile(pattern, re.S).findall(u'xyz'),
+                                     [u'xyz'], msg=pattern)
 
 
     def test_bug_2537(self):

-- 
Repository URL: http://hg.python.org/cpython