[Numpy-svn] r8151 - in trunk/numpy/core: . tests
numpy-svn at scipy.org
numpy-svn at scipy.org
Sat Feb 20 13:09:17 EST 2010
Author: ptvirtan
Date: 2010-02-20 12:09:17 -0600 (Sat, 20 Feb 2010)
New Revision: 8151
Modified:
trunk/numpy/core/defchararray.py
trunk/numpy/core/tests/test_defchararray.py
Log:
3K: ENH: make defchararray work on Py3, and make its tests Py3 compliant
There were mainly issues in mixing bytes/unicode on Py3.
Modified: trunk/numpy/core/defchararray.py
===================================================================
--- trunk/numpy/core/defchararray.py 2010-02-20 18:09:00 UTC (rev 8150)
+++ trunk/numpy/core/defchararray.py 2010-02-20 18:09:17 UTC (rev 8151)
@@ -21,6 +21,7 @@
from numeric import ndarray, compare_chararrays
from numeric import array as narray
from numpy.core.multiarray import _vec_string
+from numpy.compat import asbytes
import numpy
__all__ = ['chararray',
@@ -397,6 +398,8 @@
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = long(numpy.max(width_arr.flat))
+ if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ fillchar = asbytes(fillchar)
return _vec_string(
a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
else:
@@ -917,6 +920,8 @@
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = long(numpy.max(width_arr.flat))
+ if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ fillchar = asbytes(fillchar)
return _vec_string(
a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
else:
@@ -1183,6 +1188,8 @@
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = long(numpy.max(width_arr.flat))
+ if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ fillchar = asbytes(fillchar)
return _vec_string(
a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
else:
@@ -1841,6 +1848,13 @@
# strings in the new array.
itemsize = long(itemsize)
+ if sys.version_info[0] >= 3 and isinstance(buffer, _unicode):
+ # On Py3, unicode objects do not have the buffer interface
+ filler = buffer
+ buffer = None
+ else:
+ filler = None
+
_globalvar = 1
if buffer is None:
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
@@ -1850,6 +1864,8 @@
buffer=buffer,
offset=offset, strides=strides,
order=order)
+ if filler is not None:
+ self[...] = filler
_globalvar = 0
return self
@@ -2594,7 +2610,7 @@
if itemsize is None:
itemsize = _len(obj)
- shape = _len(obj) / itemsize
+ shape = _len(obj) // itemsize
if unicode:
if sys.maxunicode == 0xffff:
Modified: trunk/numpy/core/tests/test_defchararray.py
===================================================================
--- trunk/numpy/core/tests/test_defchararray.py 2010-02-20 18:09:00 UTC (rev 8150)
+++ trunk/numpy/core/tests/test_defchararray.py 2010-02-20 18:09:17 UTC (rev 8151)
@@ -4,7 +4,9 @@
import sys
from numpy.core.multiarray import _vec_string
-kw_unicode_true = {'unicode': True}
+from numpy.compat import asbytes, asbytes_nested
+
+kw_unicode_true = {'unicode': True} # make 2to3 work properly
kw_unicode_false = {'unicode': False}
class TestBasic(TestCase):
@@ -13,7 +15,8 @@
['long ', '0123456789']], dtype='O')
B = np.char.array(A)
assert_equal(B.dtype.itemsize, 10)
- assert_array_equal(B, [['abc', '2'], ['long', '0123456789']])
+ assert_array_equal(B, asbytes_nested([['abc', '2'],
+ ['long', '0123456789']]))
def test_from_object_array_unicode(self):
A = np.array([['abc', u'Sigma \u03a3'],
@@ -21,11 +24,12 @@
self.failUnlessRaises(ValueError, np.char.array, (A,))
B = np.char.array(A, **kw_unicode_true)
assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize)
- assert_array_equal(B, [['abc', u'Sigma \u03a3'], ['long', '0123456789']])
+ assert_array_equal(B, [['abc', u'Sigma \u03a3'],
+ ['long', '0123456789']])
def test_from_string_array(self):
- A = np.array([['abc', 'foo'],
- ['long ', '0123456789']])
+ A = np.array(asbytes_nested([['abc', 'foo'],
+ ['long ', '0123456789']]))
assert_equal(A.dtype.type, np.string_)
B = np.char.array(A)
assert_array_equal(B, A)
@@ -62,7 +66,7 @@
assert issubclass((A + B).dtype.type, np.unicode_)
def test_from_string(self):
- A = np.char.array('abc')
+ A = np.char.array(asbytes('abc'))
assert_equal(len(A), 1)
assert_equal(len(A[0]), 3)
assert issubclass(A.dtype.type, np.string_)
@@ -132,7 +136,7 @@
def test_it(self):
assert_equal(self.A.shape, (4,))
- assert_equal(self.A.upper()[:2].tostring(), 'AB')
+ assert_equal(self.A.upper()[:2].tostring(), asbytes('AB'))
class TestComparisons(TestCase):
def setUp(self):
@@ -277,17 +281,18 @@
def setUp(self):
self.A = np.array([[' abc ', ''],
['12345', 'MixedCase'],
- ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray)
+ ['123 \t 345 \0 ', 'UPPER']],
+ dtype='S').view(np.chararray)
self.B = np.array([[u' \u03a3 ', u''],
[u'12345', u'MixedCase'],
[u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray)
def test_capitalize(self):
assert issubclass(self.A.capitalize().dtype.type, np.string_)
- assert_array_equal(self.A.capitalize(), [
+ assert_array_equal(self.A.capitalize(), asbytes_nested([
[' abc ', ''],
['12345', 'Mixedcase'],
- ['123 \t 345 \0 ', 'Upper']])
+ ['123 \t 345 \0 ', 'Upper']]))
assert issubclass(self.B.capitalize().dtype.type, np.unicode_)
assert_array_equal(self.B.capitalize(), [
[u' \u03c3 ', ''],
@@ -299,56 +304,71 @@
widths = np.array([[10, 20]])
C = self.A.center([10, 20])
assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
- C = self.A.center(20, '#')
- assert np.all(C.startswith('#'))
- assert np.all(C.endswith('#'))
- C = np.char.center('FOO', [[10, 20], [15, 8]])
+ C = self.A.center(20, asbytes('#'))
+ assert np.all(C.startswith(asbytes('#')))
+ assert np.all(C.endswith(asbytes('#')))
+ C = np.char.center(asbytes('FOO'), [[10, 20], [15, 8]])
assert issubclass(C.dtype.type, np.string_)
- assert_array_equal(C, [
+ assert_array_equal(C, asbytes_nested([
[' FOO ', ' FOO '],
- [' FOO ', ' FOO ']])
+ [' FOO ', ' FOO ']]))
def test_decode(self):
- A = np.char.array(['736563726574206d657373616765'])
- assert A.decode('hex_codec')[0] == 'secret message'
+ if sys.version_info[0] >= 3:
+ A = np.char.array([asbytes('\\u03a3')])
+ assert A.decode('unicode-escape')[0] == '\u03a3'
+ else:
+ A = np.char.array(['736563726574206d657373616765'])
+ assert A.decode('hex_codec')[0] == 'secret message'
def test_encode(self):
B = self.B.encode('unicode_escape')
- assert B[0][0] == r' \u03a3 '
+ assert B[0][0] == asbytes(r' \u03a3 ')
def test_expandtabs(self):
T = self.A.expandtabs()
- assert T[2][0] == '123 345'
+ assert T[2][0] == asbytes('123 345')
def test_join(self):
- A = np.char.join([',', '#'], self.A)
- assert issubclass(A.dtype.type, np.string_)
- assert_array_equal(np.char.join([',', '#'], self.A), [
- [' ,a,b,c, ', ''],
- ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
- ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
+ if sys.version_info[0] >= 3:
+ # NOTE: list(b'123') == [49, 50, 51]
+ # so that b','.join(b'123') results to an error on Py3
+ A0 = self.A.decode('ascii')
+ else:
+ A0 = self.A
+ A = np.char.join([',', '#'], A0)
+ if sys.version_info[0] >= 3:
+ assert issubclass(A.dtype.type, np.unicode_)
+ else:
+ assert issubclass(A.dtype.type, np.string_)
+ assert_array_equal(np.char.join([',', '#'], A0),
+ [
+ [' ,a,b,c, ', ''],
+ ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
+ ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
+
def test_ljust(self):
assert issubclass(self.A.ljust(10).dtype.type, np.string_)
widths = np.array([[10, 20]])
C = self.A.ljust([10, 20])
assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
- C = self.A.ljust(20, '#')
- assert_array_equal(C.startswith('#'), [
+ C = self.A.ljust(20, asbytes('#'))
+ assert_array_equal(C.startswith(asbytes('#')), [
[False, True], [False, False], [False, False]])
- assert np.all(C.endswith('#'))
- C = np.char.ljust('FOO', [[10, 20], [15, 8]])
+ assert np.all(C.endswith(asbytes('#')))
+ C = np.char.ljust(asbytes('FOO'), [[10, 20], [15, 8]])
assert issubclass(C.dtype.type, np.string_)
- assert_array_equal(C, [
+ assert_array_equal(C, asbytes_nested([
['FOO ', 'FOO '],
- ['FOO ', 'FOO ']])
+ ['FOO ', 'FOO ']]))
def test_lower(self):
assert issubclass(self.A.lower().dtype.type, np.string_)
- assert_array_equal(self.A.lower(), [
+ assert_array_equal(self.A.lower(), asbytes_nested([
[' abc ', ''],
['12345', 'mixedcase'],
- ['123 \t 345 \0 ', 'upper']])
+ ['123 \t 345 \0 ', 'upper']]))
assert issubclass(self.B.lower().dtype.type, np.unicode_)
assert_array_equal(self.B.lower(), [
[u' \u03c3 ', u''],
@@ -357,14 +377,15 @@
def test_lstrip(self):
assert issubclass(self.A.lstrip().dtype.type, np.string_)
- assert_array_equal(self.A.lstrip(), [
+ assert_array_equal(self.A.lstrip(), asbytes_nested([
['abc ', ''],
['12345', 'MixedCase'],
- ['123 \t 345 \0 ', 'UPPER']])
- assert_array_equal(self.A.lstrip(['1', 'M']), [
+ ['123 \t 345 \0 ', 'UPPER']]))
+ assert_array_equal(self.A.lstrip(asbytes_nested(['1', 'M'])),
+ asbytes_nested([
[' abc', ''],
['2345', 'ixedCase'],
- ['23 \t 345 \x00', 'UPPER']])
+ ['23 \t 345 \x00', 'UPPER']]))
assert issubclass(self.B.lstrip().dtype.type, np.unicode_)
assert_array_equal(self.B.lstrip(), [
[u'\u03a3 ', ''],
@@ -373,68 +394,74 @@
def test_partition(self):
if sys.version_info >= (2, 5):
- P = self.A.partition(['3', 'M'])
+ P = self.A.partition(asbytes_nested(['3', 'M']))
assert issubclass(P.dtype.type, np.string_)
- assert_array_equal(P, [
+ assert_array_equal(P, asbytes_nested([
[(' abc ', '', ''), ('', '', '')],
[('12', '3', '45'), ('', 'M', 'ixedCase')],
- [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]])
+ [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]]))
def test_replace(self):
- R = self.A.replace(['3', 'a'], ['##########', '@'])
+ R = self.A.replace(asbytes_nested(['3', 'a']),
+ asbytes_nested(['##########', '@']))
assert issubclass(R.dtype.type, np.string_)
- assert_array_equal(R, [
+ assert_array_equal(R, asbytes_nested([
[' abc ', ''],
['12##########45', 'MixedC at se'],
- ['12########## \t ##########45 \x00', 'UPPER']])
- R = self.A.replace('a', u'\u03a3')
- assert issubclass(R.dtype.type, np.unicode_)
- assert_array_equal(R, [
- [u' \u03a3bc ', ''],
- ['12345', u'MixedC\u03a3se'],
- ['123 \t 345 \x00', 'UPPER']])
+ ['12########## \t ##########45 \x00', 'UPPER']]))
+ if sys.version_info[0] < 3:
+ # NOTE: b'abc'.replace(b'a', 'b') is not allowed on Py3
+ R = self.A.replace(asbytes('a'), u'\u03a3')
+ assert issubclass(R.dtype.type, np.unicode_)
+ assert_array_equal(R, [
+ [u' \u03a3bc ', ''],
+ ['12345', u'MixedC\u03a3se'],
+ ['123 \t 345 \x00', 'UPPER']])
+
def test_rjust(self):
assert issubclass(self.A.rjust(10).dtype.type, np.string_)
widths = np.array([[10, 20]])
C = self.A.rjust([10, 20])
assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
- C = self.A.rjust(20, '#')
- assert np.all(C.startswith('#'))
- assert_array_equal(C.endswith('#'), [[False, True], [False, False], [False, False]])
- C = np.char.rjust('FOO', [[10, 20], [15, 8]])
+ C = self.A.rjust(20, asbytes('#'))
+ assert np.all(C.startswith(asbytes('#')))
+ assert_array_equal(C.endswith(asbytes('#')),
+ [[False, True], [False, False], [False, False]])
+ C = np.char.rjust(asbytes('FOO'), [[10, 20], [15, 8]])
assert issubclass(C.dtype.type, np.string_)
- assert_array_equal(C, [
+ assert_array_equal(C, asbytes_nested([
[' FOO', ' FOO'],
- [' FOO', ' FOO']])
+ [' FOO', ' FOO']]))
def test_rpartition(self):
if sys.version_info >= (2, 5):
- P = self.A.rpartition(['3', 'M'])
+ P = self.A.rpartition(asbytes_nested(['3', 'M']))
assert issubclass(P.dtype.type, np.string_)
- assert_array_equal(P, [
+ assert_array_equal(P, asbytes_nested([
[('', '', ' abc '), ('', '', '')],
[('12', '3', '45'), ('', 'M', 'ixedCase')],
- [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]])
+ [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]]))
def test_rsplit(self):
- A = self.A.rsplit('3')
+ A = self.A.rsplit(asbytes('3'))
assert issubclass(A.dtype.type, np.object_)
- assert_equal(A.tolist(), [
+ assert_equal(A.tolist(), asbytes_nested([
[[' abc '], ['']],
[['12', '45'], ['MixedCase']],
- [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+ [['12', ' \t ', '45 \x00 '], ['UPPER']]]))
def test_rstrip(self):
assert issubclass(self.A.rstrip().dtype.type, np.string_)
- assert_array_equal(self.A.rstrip(), [
+ assert_array_equal(self.A.rstrip(), asbytes_nested([
[' abc', ''],
['12345', 'MixedCase'],
- ['123 \t 345', 'UPPER']])
- assert_array_equal(self.A.rstrip(['5', 'ER']), [
+ ['123 \t 345', 'UPPER']]))
+ assert_array_equal(self.A.rstrip(asbytes_nested(['5', 'ER'])),
+ asbytes_nested([
[' abc ', ''],
['1234', 'MixedCase'],
- ['123 \t 345 \x00', 'UPP']])
+ ['123 \t 345 \x00', 'UPP']]))
assert issubclass(self.B.rstrip().dtype.type, np.unicode_)
assert_array_equal(self.B.rstrip(), [
[u' \u03a3', ''],
@@ -443,14 +470,15 @@
def test_strip(self):
assert issubclass(self.A.strip().dtype.type, np.string_)
- assert_array_equal(self.A.strip(), [
+ assert_array_equal(self.A.strip(), asbytes_nested([
['abc', ''],
['12345', 'MixedCase'],
- ['123 \t 345', 'UPPER']])
- assert_array_equal(self.A.strip(['15', 'EReM']), [
+ ['123 \t 345', 'UPPER']]))
+ assert_array_equal(self.A.strip(asbytes_nested(['15', 'EReM'])),
+ asbytes_nested([
[' abc ', ''],
['234', 'ixedCas'],
- ['23 \t 345 \x00', 'UPP']])
+ ['23 \t 345 \x00', 'UPP']]))
assert issubclass(self.B.strip().dtype.type, np.unicode_)
assert_array_equal(self.B.strip(), [
[u'\u03a3', ''],
@@ -458,12 +486,12 @@
['123 \t 345', 'UPPER']])
def test_split(self):
- A = self.A.split('3')
+ A = self.A.split(asbytes('3'))
assert issubclass(A.dtype.type, np.object_)
- assert_equal(A.tolist(), [
+ assert_equal(A.tolist(), asbytes_nested([
[[' abc '], ['']],
[['12', '45'], ['MixedCase']],
- [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+ [['12', ' \t ', '45 \x00 '], ['UPPER']]]))
def test_splitlines(self):
A = np.char.array(['abc\nfds\nwer']).splitlines()
@@ -473,10 +501,10 @@
def test_swapcase(self):
assert issubclass(self.A.swapcase().dtype.type, np.string_)
- assert_array_equal(self.A.swapcase(), [
+ assert_array_equal(self.A.swapcase(), asbytes_nested([
[' ABC ', ''],
['12345', 'mIXEDcASE'],
- ['123 \t 345 \0 ', 'upper']])
+ ['123 \t 345 \0 ', 'upper']]))
assert issubclass(self.B.swapcase().dtype.type, np.unicode_)
assert_array_equal(self.B.swapcase(), [
[u' \u03c3 ', u''],
@@ -485,10 +513,10 @@
def test_title(self):
assert issubclass(self.A.title().dtype.type, np.string_)
- assert_array_equal(self.A.title(), [
+ assert_array_equal(self.A.title(), asbytes_nested([
[' Abc ', ''],
['12345', 'Mixedcase'],
- ['123 \t 345 \0 ', 'Upper']])
+ ['123 \t 345 \0 ', 'Upper']]))
assert issubclass(self.B.title().dtype.type, np.unicode_)
assert_array_equal(self.B.title(), [
[u' \u03a3 ', u''],
@@ -497,10 +525,10 @@
def test_upper(self):
assert issubclass(self.A.upper().dtype.type, np.string_)
- assert_array_equal(self.A.upper(), [
+ assert_array_equal(self.A.upper(), asbytes_nested([
[' ABC ', ''],
['12345', 'MIXEDCASE'],
- ['123 \t 345 \0 ', 'UPPER']])
+ ['123 \t 345 \0 ', 'UPPER']]))
assert issubclass(self.B.upper().dtype.type, np.unicode_)
assert_array_equal(self.B.upper(), [
[u' \u03a3 ', u''],
More information about the Numpy-svn
mailing list