[pypy-commit] pypy default: merge branch 'unicode-dtype'
rlamy
noreply at buildbot.pypy.org
Mon Jul 6 19:46:43 CEST 2015
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch:
Changeset: r78474:d62f1b272ea9
Date: 2015-07-06 18:44 +0100
http://bitbucket.org/pypy/pypy/changeset/d62f1b272ea9/
Log: merge branch 'unicode-dtype'
diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py
--- a/pypy/module/micronumpy/boxes.py
+++ b/pypy/module/micronumpy/boxes.py
@@ -196,7 +196,12 @@
"'%T' object is not iterable", self)
def descr_str(self, space):
- return space.wrap(self.get_dtype(space).itemtype.str_format(self, add_quotes=False))
+ tp = self.get_dtype(space).itemtype
+ return space.wrap(tp.str_format(self, add_quotes=False))
+
+ def descr_repr(self, space):
+ tp = self.get_dtype(space).itemtype
+ return space.wrap(tp.str_format(self, add_quotes=True))
def descr_format(self, space, w_spec):
return space.format(self.item(space), w_spec)
@@ -618,16 +623,25 @@
return W_StringBox(arr, 0, arr.dtype)
class W_UnicodeBox(W_CharacterBox):
+ def __init__(self, value):
+ self._value = value
+
+ def convert_to(self, space, dtype):
+ if dtype.is_unicode():
+ return self
+ elif dtype.is_object():
+ return W_ObjectBox(space.wrap(self._value))
+ else:
+ raise oefmt(space.w_NotImplementedError,
+ "Conversion from unicode not implemented yet")
+
+ def get_dtype(self, space):
+ from pypy.module.micronumpy.descriptor import new_unicode_dtype
+ return new_unicode_dtype(space, len(self._value))
+
def descr__new__unicode_box(space, w_subtype, w_arg):
- raise oefmt(space.w_NotImplementedError, "Unicode is not supported yet")
- from pypy.module.micronumpy.descriptor import new_unicode_dtype
- arg = space.unicode_w(space.unicode_from_object(w_arg))
- # XXX size computations, we need tests anyway
- arr = VoidBoxStorage(len(arg), new_unicode_dtype(space, len(arg)))
- # XXX not this way, we need store
- #for i in range(len(arg)):
- # arr.storage[i] = arg[i]
- return W_UnicodeBox(arr, 0, arr.dtype)
+ value = space.unicode_w(space.unicode_from_object(w_arg))
+ return W_UnicodeBox(value)
class W_ObjectBox(W_GenericBox):
descr__new__, _get_dtype, descr_reduce = new_dtype_getter(NPY.OBJECT)
@@ -649,7 +663,7 @@
__getitem__ = interp2app(W_GenericBox.descr_getitem),
__iter__ = interp2app(W_GenericBox.descr_iter),
__str__ = interp2app(W_GenericBox.descr_str),
- __repr__ = interp2app(W_GenericBox.descr_str),
+ __repr__ = interp2app(W_GenericBox.descr_repr),
__format__ = interp2app(W_GenericBox.descr_format),
__int__ = interp2app(W_GenericBox.descr_int),
__long__ = interp2app(W_GenericBox.descr_long),
diff --git a/pypy/module/micronumpy/casting.py b/pypy/module/micronumpy/casting.py
--- a/pypy/module/micronumpy/casting.py
+++ b/pypy/module/micronumpy/casting.py
@@ -325,6 +325,8 @@
return complex_dtype
elif space.isinstance_w(w_obj, space.w_str):
return variable_dtype(space, 'S%d' % space.len_w(w_obj))
+ elif space.isinstance_w(w_obj, space.w_unicode):
+ return new_unicode_dtype(space, space.len_w(w_obj))
return object_dtype
@signature(ann.instance(W_Dtype), ann.instance(W_Dtype), returns=ann.bool())
diff --git a/pypy/module/micronumpy/test/test_dtypes.py b/pypy/module/micronumpy/test/test_dtypes.py
--- a/pypy/module/micronumpy/test/test_dtypes.py
+++ b/pypy/module/micronumpy/test/test_dtypes.py
@@ -1052,20 +1052,6 @@
assert d.name == "unicode256"
assert d.num == 19
- def test_string_boxes(self):
- from numpy import str_
- assert isinstance(str_(3), str_)
-
- def test_unicode_boxes(self):
- from numpy import unicode_
- import sys
- if '__pypy__' in sys.builtin_module_names:
- exc = raises(NotImplementedError, unicode_, 3)
- assert exc.value.message.find('not supported yet') >= 0
- else:
- u = unicode_(3)
- assert isinstance(u, unicode)
-
def test_character_dtype(self):
import numpy as np
from numpy import array, character
@@ -1133,7 +1119,7 @@
def test_array_from_record(self):
import numpy as np
- a = np.array(('???', -999, -12345678.9),
+ a = np.array(('???', -999, -12345678.9),
dtype=[('c', '|S3'), ('a', '<i8'), ('b', '<f8')])
# Change the order of the keys
b = np.array(a, dtype=[('a', '<i8'), ('b', '<f8'), ('c', '|S3')])
@@ -1141,7 +1127,7 @@
assert b.dtype.fields['a'][1] == 0
assert b['a'] == -999
a = np.array(('N/A', 1e+20, 1e+20, 999999),
- dtype=[('name', '|S4'), ('x', '<f8'),
+ dtype=[('name', '|S4'), ('x', '<f8'),
('y', '<f8'), ('block', '<i8', (2, 3))])
assert (a['block'] == 999999).all()
diff --git a/pypy/module/micronumpy/test/test_ndarray.py b/pypy/module/micronumpy/test/test_ndarray.py
--- a/pypy/module/micronumpy/test/test_ndarray.py
+++ b/pypy/module/micronumpy/test/test_ndarray.py
@@ -1,3 +1,4 @@
+# -*- encoding: utf-8 -*-
import py
import sys
@@ -322,6 +323,14 @@
assert b.flags['C']
assert (b == a).all()
+ def test_unicode(self):
+ import numpy as np
+ a = np.array([3, u'Aÿ', ''], dtype='U3')
+ assert a.shape == (3,)
+ assert a.dtype == np.dtype('U3')
+ assert a[0] == u'3'
+ assert a[1] == u'Aÿ'
+
def test_dtype_attribute(self):
import numpy as np
a = np.array(40000, dtype='uint16')
@@ -380,6 +389,9 @@
assert zeros((), dtype='S') == ''
assert zeros((), dtype='S').shape == ()
assert zeros((), dtype='S').dtype == '|S1'
+ assert zeros(5, dtype='U')[4] == u''
+ assert zeros(5, dtype='U').shape == (5,)
+ assert zeros(5, dtype='U').dtype == '<U1'
def test_check_shape(self):
import numpy as np
@@ -2423,6 +2435,12 @@
a.fill(12)
assert (a == '1').all()
+ def test_unicode_filling(self):
+ import numpy as np
+ a = np.empty((10,10), dtype='U1')
+ a.fill(12)
+ assert (a == u'1').all()
+
def test_boolean_indexing(self):
import numpy as np
a = np.zeros((1, 3))
diff --git a/pypy/module/micronumpy/test/test_object_arrays.py b/pypy/module/micronumpy/test/test_object_arrays.py
--- a/pypy/module/micronumpy/test/test_object_arrays.py
+++ b/pypy/module/micronumpy/test/test_object_arrays.py
@@ -171,4 +171,8 @@
assert 'a' * 100 in str(a)
b = a.astype('S')
assert 'a' * 100 in str(b)
-
+ a = np.array([123], dtype='U')
+ assert a[0] == u'123'
+ b = a.astype('O')
+ assert b[0] == u'123'
+ assert type(b[0]) is unicode
diff --git a/pypy/module/micronumpy/test/test_scalar.py b/pypy/module/micronumpy/test/test_scalar.py
--- a/pypy/module/micronumpy/test/test_scalar.py
+++ b/pypy/module/micronumpy/test/test_scalar.py
@@ -1,3 +1,4 @@
+# -*- encoding:utf-8 -*-
from pypy.module.micronumpy.test.test_base import BaseNumpyAppTest
class AppTestScalar(BaseNumpyAppTest):
@@ -457,3 +458,25 @@
for t in complex64, complex128:
_do_test(t, 17j, -17j)
+
+ def test_string_boxes(self):
+ from numpy import str_
+ assert isinstance(str_(3), str_)
+ assert str_(3) == '3'
+ assert str(str_(3)) == '3'
+ assert repr(str_(3)) == "'3'"
+
+ def test_unicode_boxes(self):
+ from numpy import unicode_
+ u = unicode_(3)
+ assert isinstance(u, unicode)
+ assert u == u'3'
+
+ def test_unicode_repr(self):
+ from numpy import unicode_
+ u = unicode_(3)
+ assert str(u) == '3'
+ assert repr(u) == "u'3'"
+ u = unicode_(u'Aÿ')
+ # raises(UnicodeEncodeError, "str(u)") # XXX
+ assert repr(u) == repr(u'Aÿ')
diff --git a/pypy/module/micronumpy/test/test_selection.py b/pypy/module/micronumpy/test/test_selection.py
--- a/pypy/module/micronumpy/test/test_selection.py
+++ b/pypy/module/micronumpy/test/test_selection.py
@@ -210,22 +210,28 @@
assert (c == a).all(), msg
def test_sort_unicode(self):
+ import sys
from numpy import array
# test unicode sorts.
s = 'aaaaaaaa'
- try:
- a = array([s + chr(i) for i in range(101)], dtype=unicode)
- b = a[::-1].copy()
- except:
- skip('unicode type not supported yet')
- for kind in ['q', 'm', 'h'] :
+ a = array([s + chr(i) for i in range(101)], dtype=unicode)
+ b = a[::-1].copy()
+ for kind in ['q', 'm', 'h']:
msg = "unicode sort, kind=%s" % kind
- c = a.copy();
- c.sort(kind=kind)
- assert (c == a).all(), msg
- c = b.copy();
- c.sort(kind=kind)
- assert (c == a).all(), msg
+ c = a.copy()
+ if '__pypy__' in sys.builtin_module_names:
+ exc = raises(NotImplementedError, "c.sort(kind=kind)")
+ assert 'non-numeric types' in exc.value.message
+ else:
+ c.sort(kind=kind)
+ assert (c == a).all(), msg
+ c = b.copy()
+ if '__pypy__' in sys.builtin_module_names:
+ exc = raises(NotImplementedError, "c.sort(kind=kind)")
+ assert 'non-numeric types' in exc.value.message
+ else:
+ c.sort(kind=kind)
+ assert (c == a).all(), msg
def test_sort_objects(self):
# test object array sorts.
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -11,7 +11,7 @@
most_neg_value_of, LONG_BIT
from rpython.rlib.rawstorage import (alloc_raw_storage,
raw_storage_getitem_unaligned, raw_storage_setitem_unaligned)
-from rpython.rlib.rstring import StringBuilder
+from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
from rpython.rlib.rstruct.ieee import (float_pack, float_unpack, unpack_float,
pack_float80, unpack_float80)
from rpython.rlib.rstruct.nativefmttable import native_is_bigendian
@@ -50,6 +50,7 @@
pass
return _raw_storage_getitem_unaligned(T, storage, offset)
'''
+
def simple_unary_op(func):
specialize.argtype(1)(func)
@functools.wraps(func)
@@ -2177,7 +2178,7 @@
self._store(storage, i, offset, box, width)
class UnicodeType(FlexibleType):
- T = lltype.Char
+ T = lltype.UniChar
num = NPY.UNICODE
kind = NPY.UNICODELTR
char = NPY.UNICODELTR
@@ -2189,58 +2190,121 @@
def coerce(self, space, dtype, w_item):
if isinstance(w_item, boxes.W_UnicodeBox):
return w_item
- raise OperationError(space.w_NotImplementedError, space.wrap(
- "coerce (probably from set_item) not implemented for unicode type"))
+ value = space.unicode_w(space.unicode_from_object(w_item))
+ return boxes.W_UnicodeBox(value)
def store(self, arr, i, offset, box, native):
assert isinstance(box, boxes.W_UnicodeBox)
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ value = box._value
+ with arr as storage:
+ self._store(storage, i, offset, box, arr.dtype.elsize)
+
+ @jit.unroll_safe
+ def _store(self, storage, i, offset, box, width):
+ size = min(width // 4, len(box._value))
+ for k in range(size):
+ index = i + offset + 4*k
+ data = rffi.cast(Int32.T, ord(box._value[k]))
+ raw_storage_setitem_unaligned(storage, index, data)
+ for k in range(size, width // 4):
+ index = i + offset + 4*k
+ data = rffi.cast(Int32.T, 0)
+ raw_storage_setitem_unaligned(storage, index, data)
def read(self, arr, i, offset, dtype):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ if dtype is None:
+ dtype = arr.dtype
+ size = dtype.elsize // 4
+ builder = UnicodeBuilder(size)
+ with arr as storage:
+ for k in range(size):
+ index = i + offset + 4*k
+ codepoint = raw_storage_getitem_unaligned(
+ Int32.T, arr.storage, index)
+ char = unichr(codepoint)
+ if char == u'\0':
+ break
+ builder.append(char)
+ return boxes.W_UnicodeBox(builder.build())
def str_format(self, item, add_quotes=True):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(item, boxes.W_UnicodeBox)
+ if add_quotes:
+ w_unicode = self.to_builtin_type(self.space, item)
+ return self.space.str_w(self.space.repr(w_unicode))
+ else:
+ # Same as W_UnicodeBox.descr_repr() but without quotes and prefix
+ from rpython.rlib.runicode import unicode_encode_unicode_escape
+ return unicode_encode_unicode_escape(item._value,
+ len(item._value), 'strict')
def to_builtin_type(self, space, box):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(box, boxes.W_UnicodeBox)
+ return space.wrap(box._value)
def eq(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value == v2._value
def ne(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value != v2._value
def lt(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value < v2._value
def le(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value <= v2._value
def gt(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value > v2._value
def ge(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ return v1._value >= v2._value
def logical_and(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ if bool(v1) and bool(v2):
+ return Bool._True
+ return Bool._False
def logical_or(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ if bool(v1) or bool(v2):
+ return Bool._True
+ return Bool._False
def logical_not(self, v):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
-
- @str_binary_op
+ assert isinstance(v, boxes.W_UnicodeBox)
+ return not bool(v)
+
def logical_xor(self, v1, v2):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v1, boxes.W_UnicodeBox)
+ assert isinstance(v2, boxes.W_UnicodeBox)
+ a = bool(v1)
+ b = bool(v2)
+ return (not b and a) or (not a and b)
def bool(self, v):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(v, boxes.W_UnicodeBox)
+ return bool(v._value)
def fill(self, storage, width, native, box, start, stop, offset, gcstruct):
- raise oefmt(self.space.w_NotImplementedError, "unicode type not completed")
+ assert isinstance(box, boxes.W_UnicodeBox)
+ for i in xrange(start, stop, width):
+ self._store(storage, i, offset, box, width)
class VoidType(FlexibleType):
More information about the pypy-commit
mailing list