[Numpy-svn] r8151 - in trunk/numpy/core: . tests

Sat Feb 20 13:09:17 EST 2010

Author: ptvirtan
Date: 2010-02-20 12:09:17 -0600 (Sat, 20 Feb 2010)
New Revision: 8151

Modified:
   trunk/numpy/core/defchararray.py
   trunk/numpy/core/tests/test_defchararray.py
Log:
3K: ENH: make defchararray work on Py3, and make its tests Py3 compliant

There were mainly issues in mixing bytes/unicode on Py3.

Modified: trunk/numpy/core/defchararray.py
===================================================================

--- trunk/numpy/core/defchararray.py	2010-02-20 18:09:00 UTC (rev 8150)
+++ trunk/numpy/core/defchararray.py	2010-02-20 18:09:17 UTC (rev 8151)
@@ -21,6 +21,7 @@
 from numeric import ndarray, compare_chararrays
 from numeric import array as narray
 from numpy.core.multiarray import _vec_string
+from numpy.compat import asbytes
 import numpy
 
 __all__ = ['chararray',
@@ -397,6 +398,8 @@
         a_arr = numpy.asarray(a)
         width_arr = numpy.asarray(width)
         size = long(numpy.max(width_arr.flat))
+        if numpy.issubdtype(a_arr.dtype, numpy.string_):
+            fillchar = asbytes(fillchar)
         return _vec_string(
             a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
 else:
@@ -917,6 +920,8 @@
         a_arr = numpy.asarray(a)
         width_arr = numpy.asarray(width)
         size = long(numpy.max(width_arr.flat))
+        if numpy.issubdtype(a_arr.dtype, numpy.string_):
+            fillchar = asbytes(fillchar)
         return _vec_string(
             a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
 else:
@@ -1183,6 +1188,8 @@
         a_arr = numpy.asarray(a)
         width_arr = numpy.asarray(width)
         size = long(numpy.max(width_arr.flat))
+        if numpy.issubdtype(a_arr.dtype, numpy.string_):
+            fillchar = asbytes(fillchar)
         return _vec_string(
             a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
 else:
@@ -1841,6 +1848,13 @@
         # strings in the new array.
         itemsize = long(itemsize)
 
+        if sys.version_info[0] >= 3 and isinstance(buffer, _unicode):
+            # On Py3, unicode objects do not have the buffer interface
+            filler = buffer
+            buffer = None
+        else:
+            filler = None
+
         _globalvar = 1
         if buffer is None:
             self = ndarray.__new__(subtype, shape, (dtype, itemsize),
@@ -1850,6 +1864,8 @@
                                    buffer=buffer,
                                    offset=offset, strides=strides,
                                    order=order)
+        if filler is not None:
+            self[...] = filler
         _globalvar = 0
         return self
 
@@ -2594,7 +2610,7 @@
 
         if itemsize is None:
             itemsize = _len(obj)
-        shape = _len(obj) / itemsize
+        shape = _len(obj) // itemsize
 
         if unicode:
             if sys.maxunicode == 0xffff:

Modified: trunk/numpy/core/tests/test_defchararray.py
===================================================================
--- trunk/numpy/core/tests/test_defchararray.py	2010-02-20 18:09:00 UTC (rev 8150)
+++ trunk/numpy/core/tests/test_defchararray.py	2010-02-20 18:09:17 UTC (rev 8151)
@@ -4,7 +4,9 @@
 import sys
 from numpy.core.multiarray import _vec_string
 
-kw_unicode_true = {'unicode': True}
+from numpy.compat import asbytes, asbytes_nested
+
+kw_unicode_true = {'unicode': True} # make 2to3 work properly
 kw_unicode_false = {'unicode': False}
 
 class TestBasic(TestCase):
@@ -13,7 +15,8 @@
                       ['long   ', '0123456789']], dtype='O')
         B = np.char.array(A)
         assert_equal(B.dtype.itemsize, 10)
-        assert_array_equal(B, [['abc', '2'], ['long', '0123456789']])
+        assert_array_equal(B, asbytes_nested([['abc', '2'],
+                                              ['long', '0123456789']]))
 
     def test_from_object_array_unicode(self):
         A = np.array([['abc', u'Sigma \u03a3'],
@@ -21,11 +24,12 @@
         self.failUnlessRaises(ValueError, np.char.array, (A,))
         B = np.char.array(A, **kw_unicode_true)
         assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize)
-        assert_array_equal(B, [['abc', u'Sigma \u03a3'], ['long', '0123456789']])
+        assert_array_equal(B, [['abc', u'Sigma \u03a3'],
+                               ['long', '0123456789']])
 
     def test_from_string_array(self):
-        A = np.array([['abc', 'foo'],
-                      ['long   ', '0123456789']])
+        A = np.array(asbytes_nested([['abc', 'foo'],
+                                     ['long   ', '0123456789']]))
         assert_equal(A.dtype.type, np.string_)
         B = np.char.array(A)
         assert_array_equal(B, A)
@@ -62,7 +66,7 @@
         assert issubclass((A + B).dtype.type, np.unicode_)
 
     def test_from_string(self):
-        A = np.char.array('abc')
+        A = np.char.array(asbytes('abc'))
         assert_equal(len(A), 1)
         assert_equal(len(A[0]), 3)
         assert issubclass(A.dtype.type, np.string_)
@@ -132,7 +136,7 @@
 
     def test_it(self):
         assert_equal(self.A.shape, (4,))
-        assert_equal(self.A.upper()[:2].tostring(), 'AB')
+        assert_equal(self.A.upper()[:2].tostring(), asbytes('AB'))
 
 class TestComparisons(TestCase):
     def setUp(self):
@@ -277,17 +281,18 @@
     def setUp(self):
         self.A = np.array([[' abc ', ''],
                            ['12345', 'MixedCase'],
-                           ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray)
+                           ['123 \t 345 \0 ', 'UPPER']],
+                          dtype='S').view(np.chararray)
         self.B = np.array([[u' \u03a3 ', u''],
                            [u'12345', u'MixedCase'],
                            [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray)
 
     def test_capitalize(self):
         assert issubclass(self.A.capitalize().dtype.type, np.string_)
-        assert_array_equal(self.A.capitalize(), [
+        assert_array_equal(self.A.capitalize(), asbytes_nested([
                 [' abc ', ''],
                 ['12345', 'Mixedcase'],
-                ['123 \t 345 \0 ', 'Upper']])
+                ['123 \t 345 \0 ', 'Upper']]))
         assert issubclass(self.B.capitalize().dtype.type, np.unicode_)
         assert_array_equal(self.B.capitalize(), [
                 [u' \u03c3 ', ''],
@@ -299,56 +304,71 @@
         widths = np.array([[10, 20]])
         C = self.A.center([10, 20])
         assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
-        C = self.A.center(20, '#')
-        assert np.all(C.startswith('#'))
-        assert np.all(C.endswith('#'))
-        C = np.char.center('FOO', [[10, 20], [15, 8]])
+        C = self.A.center(20, asbytes('#'))
+        assert np.all(C.startswith(asbytes('#')))
+        assert np.all(C.endswith(asbytes('#')))
+        C = np.char.center(asbytes('FOO'), [[10, 20], [15, 8]])
         assert issubclass(C.dtype.type, np.string_)
-        assert_array_equal(C, [
+        assert_array_equal(C, asbytes_nested([
                 ['   FOO    ', '        FOO         '],
-                ['      FOO      ', '  FOO   ']])
+                ['      FOO      ', '  FOO   ']]))
 
     def test_decode(self):
-        A = np.char.array(['736563726574206d657373616765'])
-        assert A.decode('hex_codec')[0] == 'secret message'
+        if sys.version_info[0] >= 3:
+            A = np.char.array([asbytes('\\u03a3')])
+            assert A.decode('unicode-escape')[0] == '\u03a3'
+        else:
+            A = np.char.array(['736563726574206d657373616765'])
+            assert A.decode('hex_codec')[0] == 'secret message'
 
     def test_encode(self):
         B = self.B.encode('unicode_escape')
-        assert B[0][0] == r' \u03a3 '
+        assert B[0][0] == asbytes(r' \u03a3 ')
 
     def test_expandtabs(self):
         T = self.A.expandtabs()
-        assert T[2][0] == '123      345'
+        assert T[2][0] == asbytes('123      345')
 
     def test_join(self):
-        A = np.char.join([',', '#'], self.A)
-        assert issubclass(A.dtype.type, np.string_)
-        assert_array_equal(np.char.join([',', '#'], self.A), [
-                [' ,a,b,c, ', ''],
-                ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
-                ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
+        if sys.version_info[0] >= 3:
+            # NOTE: list(b'123') == [49, 50, 51]
+            #       so that b','.join(b'123') results to an error on Py3
+            A0 = self.A.decode('ascii')
+        else:
+            A0 = self.A
 
+        A = np.char.join([',', '#'], A0)
+        if sys.version_info[0] >= 3:
+            assert issubclass(A.dtype.type, np.unicode_)
+        else:
+            assert issubclass(A.dtype.type, np.string_)
+        assert_array_equal(np.char.join([',', '#'], A0),
+                           [
+            [' ,a,b,c, ', ''],
+            ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
+            ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
+
     def test_ljust(self):
         assert issubclass(self.A.ljust(10).dtype.type, np.string_)
         widths = np.array([[10, 20]])
         C = self.A.ljust([10, 20])
         assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
-        C = self.A.ljust(20, '#')
-        assert_array_equal(C.startswith('#'), [
+        C = self.A.ljust(20, asbytes('#'))
+        assert_array_equal(C.startswith(asbytes('#')), [
                 [False, True], [False, False], [False, False]])
-        assert np.all(C.endswith('#'))
-        C = np.char.ljust('FOO', [[10, 20], [15, 8]])
+        assert np.all(C.endswith(asbytes('#')))
+        C = np.char.ljust(asbytes('FOO'), [[10, 20], [15, 8]])
         assert issubclass(C.dtype.type, np.string_)
-        assert_array_equal(C, [
+        assert_array_equal(C, asbytes_nested([
                 ['FOO       ', 'FOO                 '],
-                ['FOO            ', 'FOO     ']])
+                ['FOO            ', 'FOO     ']]))
 
     def test_lower(self):
         assert issubclass(self.A.lower().dtype.type, np.string_)
-        assert_array_equal(self.A.lower(), [
+        assert_array_equal(self.A.lower(), asbytes_nested([
                 [' abc ', ''],
                 ['12345', 'mixedcase'],
-                ['123 \t 345 \0 ', 'upper']])
+                ['123 \t 345 \0 ', 'upper']]))
         assert issubclass(self.B.lower().dtype.type, np.unicode_)
         assert_array_equal(self.B.lower(), [
                 [u' \u03c3 ', u''],
@@ -357,14 +377,15 @@
 
     def test_lstrip(self):
         assert issubclass(self.A.lstrip().dtype.type, np.string_)
-        assert_array_equal(self.A.lstrip(), [
+        assert_array_equal(self.A.lstrip(), asbytes_nested([
                 ['abc ', ''],
                 ['12345', 'MixedCase'],
-                ['123 \t 345 \0 ', 'UPPER']])
-        assert_array_equal(self.A.lstrip(['1', 'M']), [
+                ['123 \t 345 \0 ', 'UPPER']]))
+        assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])),
+                           asbytes_nested([
                 [' abc', ''],
                 ['2345', 'ixedCase'],
-                ['23 \t 345 \x00', 'UPPER']])
+                ['23 \t 345 \x00', 'UPPER']]))
         assert issubclass(self.B.lstrip().dtype.type, np.unicode_)
         assert_array_equal(self.B.lstrip(), [
                 [u'\u03a3 ', ''],
@@ -373,68 +394,74 @@
 
     def test_partition(self):
         if sys.version_info >= (2, 5):
-            P = self.A.partition(['3', 'M'])
+            P = self.A.partition(asbytes_nested(['3', 'M']))
             assert issubclass(P.dtype.type, np.string_)
-            assert_array_equal(P, [
+            assert_array_equal(P, asbytes_nested([
                     [(' abc ', '', ''), ('', '', '')],
                     [('12', '3', '45'), ('', 'M', 'ixedCase')],
-                    [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]])
+                    [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]]))
 
     def test_replace(self):
-        R = self.A.replace(['3', 'a'], ['##########', '@'])
+        R = self.A.replace(asbytes_nested(['3', 'a']),
+                           asbytes_nested(['##########', '@']))
         assert issubclass(R.dtype.type, np.string_)
-        assert_array_equal(R, [
+        assert_array_equal(R, asbytes_nested([
                 [' abc ', ''],
                 ['12##########45', 'MixedC at se'],
-                ['12########## \t ##########45 \x00', 'UPPER']])
-        R = self.A.replace('a', u'\u03a3')
-        assert issubclass(R.dtype.type, np.unicode_)
-        assert_array_equal(R, [
-                [u' \u03a3bc ', ''],
-                ['12345', u'MixedC\u03a3se'],
-                ['123 \t 345 \x00', 'UPPER']])
+                ['12########## \t ##########45 \x00', 'UPPER']]))
 
+        if sys.version_info[0] < 3:
+            # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
+            R = self.A.replace(asbytes('a'), u'\u03a3')
+            assert issubclass(R.dtype.type, np.unicode_)
+            assert_array_equal(R, [
+                    [u' \u03a3bc ', ''],
+                    ['12345', u'MixedC\u03a3se'],
+                    ['123 \t 345 \x00', 'UPPER']])
+
     def test_rjust(self):
         assert issubclass(self.A.rjust(10).dtype.type, np.string_)
         widths = np.array([[10, 20]])
         C = self.A.rjust([10, 20])
         assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
-        C = self.A.rjust(20, '#')
-        assert np.all(C.startswith('#'))
-        assert_array_equal(C.endswith('#'), [[False, True], [False, False], [False, False]])
-        C = np.char.rjust('FOO', [[10, 20], [15, 8]])
+        C = self.A.rjust(20, asbytes('#'))
+        assert np.all(C.startswith(asbytes('#')))
+        assert_array_equal(C.endswith(asbytes('#')),
+                           [[False, True], [False, False], [False, False]])
+        C = np.char.rjust(asbytes('FOO'), [[10, 20], [15, 8]])
         assert issubclass(C.dtype.type, np.string_)
-        assert_array_equal(C, [
+        assert_array_equal(C, asbytes_nested([
                 ['       FOO', '                 FOO'],
-                ['            FOO', '     FOO']])
+                ['            FOO', '     FOO']]))
 
     def test_rpartition(self):
         if sys.version_info >= (2, 5):
-            P = self.A.rpartition(['3', 'M'])
+            P = self.A.rpartition(asbytes_nested(['3', 'M']))
             assert issubclass(P.dtype.type, np.string_)
-            assert_array_equal(P, [
+            assert_array_equal(P, asbytes_nested([
                     [('', '', ' abc '), ('', '', '')],
                     [('12', '3', '45'), ('', 'M', 'ixedCase')],
-                    [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]])
+                    [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]]))
 
     def test_rsplit(self):
-        A = self.A.rsplit('3')
+        A = self.A.rsplit(asbytes('3'))
         assert issubclass(A.dtype.type, np.object_)
-        assert_equal(A.tolist(), [
+        assert_equal(A.tolist(), asbytes_nested([
                 [[' abc '], ['']],
                 [['12', '45'], ['MixedCase']],
-                [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+                [['12', ' \t ', '45 \x00 '], ['UPPER']]]))
 
     def test_rstrip(self):
         assert issubclass(self.A.rstrip().dtype.type, np.string_)
-        assert_array_equal(self.A.rstrip(), [
+        assert_array_equal(self.A.rstrip(), asbytes_nested([
                 [' abc', ''],
                 ['12345', 'MixedCase'],
-                ['123 \t 345', 'UPPER']])
-        assert_array_equal(self.A.rstrip(['5', 'ER']), [
+                ['123 \t 345', 'UPPER']]))
+        assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])),
+                           asbytes_nested([
                 [' abc ', ''],
                 ['1234', 'MixedCase'],
-                ['123 \t 345 \x00', 'UPP']])
+                ['123 \t 345 \x00', 'UPP']]))
         assert issubclass(self.B.rstrip().dtype.type, np.unicode_)
         assert_array_equal(self.B.rstrip(), [
                 [u' \u03a3', ''],
@@ -443,14 +470,15 @@
 
     def test_strip(self):
         assert issubclass(self.A.strip().dtype.type, np.string_)
-        assert_array_equal(self.A.strip(), [
+        assert_array_equal(self.A.strip(), asbytes_nested([
                 ['abc', ''],
                 ['12345', 'MixedCase'],
-                ['123 \t 345', 'UPPER']])
-        assert_array_equal(self.A.strip(['15', 'EReM']), [
+                ['123 \t 345', 'UPPER']]))
+        assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])),
+                           asbytes_nested([
                 [' abc ', ''],
                 ['234', 'ixedCas'],
-                ['23 \t 345 \x00', 'UPP']])
+                ['23 \t 345 \x00', 'UPP']]))
         assert issubclass(self.B.strip().dtype.type, np.unicode_)
         assert_array_equal(self.B.strip(), [
                 [u'\u03a3', ''],
@@ -458,12 +486,12 @@
                 ['123 \t 345', 'UPPER']])
 
     def test_split(self):
-        A = self.A.split('3')
+        A = self.A.split(asbytes('3'))
         assert issubclass(A.dtype.type, np.object_)
-        assert_equal(A.tolist(), [
+        assert_equal(A.tolist(), asbytes_nested([
                 [[' abc '], ['']],
                 [['12', '45'], ['MixedCase']],
-                [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+                [['12', ' \t ', '45 \x00 '], ['UPPER']]]))
 
     def test_splitlines(self):
         A = np.char.array(['abc\nfds\nwer']).splitlines()
@@ -473,10 +501,10 @@
 
     def test_swapcase(self):
         assert issubclass(self.A.swapcase().dtype.type, np.string_)
-        assert_array_equal(self.A.swapcase(), [
+        assert_array_equal(self.A.swapcase(), asbytes_nested([
                 [' ABC ', ''],
                 ['12345', 'mIXEDcASE'],
-                ['123 \t 345 \0 ', 'upper']])
+                ['123 \t 345 \0 ', 'upper']]))
         assert issubclass(self.B.swapcase().dtype.type, np.unicode_)
         assert_array_equal(self.B.swapcase(), [
                 [u' \u03c3 ', u''],
@@ -485,10 +513,10 @@
 
     def test_title(self):
         assert issubclass(self.A.title().dtype.type, np.string_)
-        assert_array_equal(self.A.title(), [
+        assert_array_equal(self.A.title(), asbytes_nested([
                 [' Abc ', ''],
                 ['12345', 'Mixedcase'],
-                ['123 \t 345 \0 ', 'Upper']])
+                ['123 \t 345 \0 ', 'Upper']]))
         assert issubclass(self.B.title().dtype.type, np.unicode_)
         assert_array_equal(self.B.title(), [
                 [u' \u03a3 ', u''],
@@ -497,10 +525,10 @@
 
     def test_upper(self):
         assert issubclass(self.A.upper().dtype.type, np.string_)
-        assert_array_equal(self.A.upper(), [
+        assert_array_equal(self.A.upper(), asbytes_nested([
                 [' ABC ', ''],
                 ['12345', 'MIXEDCASE'],
-                ['123 \t 345 \0 ', 'UPPER']])
+                ['123 \t 345 \0 ', 'UPPER']]))
         assert issubclass(self.B.upper().dtype.type, np.unicode_)
         assert_array_equal(self.B.upper(), [
                 [u' \u03a3 ', u''],