[pypy-commit] pypy unicode-utf8: fix micronumpy
fijal
pypy.commits at gmail.com
Mon Dec 11 01:38:36 EST 2017
Author: fijal
Branch: unicode-utf8
Changeset: r93358:fadafada40af
Date: 2017-12-11 08:37 +0200
http://bitbucket.org/pypy/pypy/changeset/fadafada40af/
Log: fix micronumpy
diff --git a/pypy/module/micronumpy/boxes.py b/pypy/module/micronumpy/boxes.py
--- a/pypy/module/micronumpy/boxes.py
+++ b/pypy/module/micronumpy/boxes.py
@@ -11,6 +11,7 @@
from rpython.rlib.rstring import StringBuilder
from rpython.rlib.objectmodel import specialize
from rpython.rlib import jit
+from rpython.rlib.rutf8 import get_utf8_length
from rpython.rtyper.lltypesystem import lltype, rffi
from rpython.tool.sourcetools import func_with_new_name
from pypy.module.micronumpy import constants as NPY
@@ -636,7 +637,8 @@
if dtype.is_unicode():
return self
elif dtype.is_object():
- return W_ObjectBox(space.newunicode(self._value))
+ return W_ObjectBox(space.newutf8(self._value,
+ get_utf8_length(self._value)))
else:
raise oefmt(space.w_NotImplementedError,
"Conversion from unicode not implemented yet")
@@ -646,7 +648,7 @@
return new_unicode_dtype(space, len(self._value))
def descr__new__unicode_box(space, w_subtype, w_arg):
- value = space.unicode_w(space.unicode_from_object(w_arg))
+ value = space.utf8_w(space.unicode_from_object(w_arg))
return W_UnicodeBox(value)
class W_ObjectBox(W_GenericBox):
diff --git a/pypy/module/micronumpy/compile.py b/pypy/module/micronumpy/compile.py
--- a/pypy/module/micronumpy/compile.py
+++ b/pypy/module/micronumpy/compile.py
@@ -197,7 +197,7 @@
return StringObject(obj)
newbytes = newtext
- def newunicode(self, obj):
+ def newutf8(self, obj, l):
raise NotImplementedError
def newlist(self, items):
@@ -305,10 +305,10 @@
raise NotImplementedError
text_w = bytes_w
- def unicode_w(self, w_obj):
+ def utf8_w(self, w_obj):
# XXX
if isinstance(w_obj, StringObject):
- return unicode(w_obj.v)
+ return w_obj.v
raise NotImplementedError
def int(self, w_obj):
diff --git a/pypy/module/micronumpy/types.py b/pypy/module/micronumpy/types.py
--- a/pypy/module/micronumpy/types.py
+++ b/pypy/module/micronumpy/types.py
@@ -1,6 +1,7 @@
import functools
import math
from rpython.rlib.unroll import unrolling_iterable
+from rpython.rlib.rutf8 import Utf8StringIterator, get_utf8_length, Utf8StringBuilder
from pypy.interpreter.error import OperationError, oefmt
from pypy.objspace.std.floatobject import float2string
from pypy.objspace.std.complexobject import str_format
@@ -2271,23 +2272,29 @@
if isinstance(w_item, boxes.W_UnicodeBox):
return w_item
if isinstance(w_item, boxes.W_ObjectBox):
- value = space.unicode_w(space.unicode_from_object(w_item.w_obj))
+ value = space.utf8_w(space.unicode_from_object(w_item.w_obj))
else:
- value = space.unicode_w(space.unicode_from_object(w_item))
+ value = space.utf8_w(space.unicode_from_object(w_item))
return boxes.W_UnicodeBox(value)
+ def convert_utf8_to_unichar_list(self, utf8):
+ l = []
+ for ch in Utf8StringIterator(utf8):
+ l.append(unichr(ch))
+ return l
+
def store(self, arr, i, offset, box, native):
assert isinstance(box, boxes.W_UnicodeBox)
- value = box._value
with arr as storage:
self._store(storage, i, offset, box, arr.dtype.elsize)
@jit.unroll_safe
def _store(self, storage, i, offset, box, width):
- size = min(width // 4, len(box._value))
+ v = self.convert_utf8_to_unichar_list(box._value)
+ size = min(width // 4, len(v))
for k in range(size):
index = i + offset + 4*k
- data = rffi.cast(Int32.T, ord(box._value[k]))
+ data = rffi.cast(Int32.T, ord(v[k]))
raw_storage_setitem_unaligned(storage, index, data)
# zero out the remaining memory
for index in range(size * 4 + i + offset, width):
@@ -2298,16 +2305,16 @@
if dtype is None:
dtype = arr.dtype
size = dtype.elsize // 4
- builder = UnicodeBuilder(size)
+ builder = Utf8StringBuilder(size)
with arr as storage:
for k in range(size):
index = i + offset + 4*k
- codepoint = raw_storage_getitem_unaligned(
- Int32.T, arr.storage, index)
- char = unichr(codepoint)
- if char == u'\0':
+ codepoint = rffi.cast(lltype.Signed,
+ raw_storage_getitem_unaligned(
+ Int32.T, arr.storage, index))
+ if codepoint == 0:
break
- builder.append(char)
+ builder.append_code(codepoint)
return boxes.W_UnicodeBox(builder.build())
def str_format(self, item, add_quotes=True):
@@ -2323,7 +2330,7 @@
def to_builtin_type(self, space, box):
assert isinstance(box, boxes.W_UnicodeBox)
- return space.newunicode(box._value)
+ return space.newutf8(box._value, get_utf8_length(box._value))
def eq(self, v1, v2):
assert isinstance(v1, boxes.W_UnicodeBox)
More information about the pypy-commit
mailing list