[pypy-commit] pypy unicode-utf8: improve the slice tests and fix it
fijal
pypy.commits at gmail.com
Thu Oct 26 14:11:41 EDT 2017
Author: fijal
Branch: unicode-utf8
Changeset: r92855:84d1ebd9002d
Date: 2017-10-26 20:11 +0200
http://bitbucket.org/pypy/pypy/changeset/84d1ebd9002d/
Log: improve the slice tests and fix it
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -907,16 +907,31 @@
def test_getslice(self):
assert u'123456'.__getslice__(1, 5) == u'2345'
- s = u"abc"
- assert s[:] == "abc"
- assert s[1:] == "bc"
- assert s[:2] == "ab"
- assert s[1:2] == "b"
- assert s[-2:] == "bc"
- assert s[:-1] == "ab"
- assert s[-2:2] == "b"
- assert s[1:-1] == "b"
- assert s[-2:-1] == "b"
+ s = u"\u0105b\u0107"
+ assert s[:] == u"\u0105b\u0107"
+ assert s[1:] == u"b\u0107"
+ assert s[:2] == u"\u0105b"
+ assert s[1:2] == u"b"
+ assert s[-2:] == u"b\u0107"
+ assert s[:-1] == u"\u0105b"
+ assert s[-2:2] == u"b"
+ assert s[1:-1] == u"b"
+ assert s[-2:-1] == u"b"
+
+ def test_getitem_slice(self):
+ assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
+ s = u"\u0105b\u0107"
+ assert s[slice(3)] == u"\u0105b\u0107"
+ assert s[slice(1, 3)] == u"b\u0107"
+ assert s[slice(2)] == u"\u0105b"
+ assert s[slice(1,2)] == u"b"
+ assert s[slice(-2,3)] == u"b\u0107"
+ assert s[slice(-1)] == u"\u0105b"
+ assert s[slice(-2,2)] == u"b"
+ assert s[slice(1,-1)] == u"b"
+ assert s[slice(-2,-1)] == u"b"
+ assert u"abcde"[::2] == u"ace"
+ assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"
def test_no_len_on_str_iter(self):
iterable = u"hello"
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -21,7 +21,7 @@
from pypy.objspace.std import newformat
from pypy.objspace.std.basestringtype import basestring_typedef
from pypy.objspace.std.formatting import mod_format
-from pypy.objspace.std.sliceobject import (
+from pypy.objspace.std.sliceobject import (W_SliceObject,
unwrap_start_stop, normalize_simple_slice)
from pypy.objspace.std.stringmethods import StringMethods
from pypy.objspace.std.util import IDTAG_SPECIAL, IDTAG_SHIFT
@@ -724,8 +724,36 @@
return space.newlist_utf8(res)
+ def descr_getitem(self, space, w_index):
+ if isinstance(w_index, W_SliceObject):
+ length = self._len()
+ start, stop, step, sl = w_index.indices4(space, length)
+ if sl == 0:
+ return self._empty()
+ elif step == 1:
+ assert start >= 0 and stop >= 0
+ return self._unicode_sliced(space, start, stop)
+ else:
+ return self._getitem_slice_slowpath(space, start, step, sl)
+
+ index = space.getindex_w(w_index, space.w_IndexError, "string index")
+ return self._getitem_result(space, index)
+
+ def _getitem_slice_slowpath(self, space, start, step, sl):
+ # XXX same comment as in _unicode_sliced
+ builder = StringBuilder(step * sl)
+ byte_pos = self._index_to_byte(start)
+ i = 0
+ while True:
+ next_pos = rutf8.next_codepoint_pos(self._utf8, byte_pos)
+ builder.append(self._utf8[byte_pos:next_pos])
+ if i == sl - 1:
+ break
+ i += 1
+ byte_pos = self._index_to_byte(start + i * step)
+ return W_UnicodeObject(builder.build(), sl)
+
def descr_getslice(self, space, w_start, w_stop):
- selfvalue = self._utf8
start, stop = normalize_simple_slice(
space, self._len(), w_start, w_stop)
if start == stop:
More information about the pypy-commit
mailing list