[pypy-commit] pypy default: Backport test additions and cleanups from unicode-utf8
rlamy
pypy.commits at gmail.com
Tue Dec 12 00:02:28 EST 2017
Author: Ronan Lamy <ronan.lamy at gmail.com>
Branch:
Changeset: r93381:458ccc9243cd
Date: 2017-12-12 04:57 +0000
http://bitbucket.org/pypy/pypy/changeset/458ccc9243cd/
Log: Backport test additions and cleanups from unicode-utf8
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -1246,3 +1246,7 @@
exc = py.test.raises(SyntaxError, self.get_ast, input).value
assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
" bytes in position 0-1: truncated \\xXX escape")
+ input = "u'\\x1'"
+ exc = py.test.raises(SyntaxError, self.get_ast, input).value
+ assert exc.msg == ("(unicode error) 'unicodeescape' codec can't decode"
+ " bytes in position 0-2: truncated \\xXX escape")
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -210,7 +210,8 @@
def xmlcharrefreplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
@@ -236,7 +237,8 @@
def backslashreplace_errors(space, w_exc):
check_exception(space, w_exc)
if space.isinstance_w(w_exc, space.w_UnicodeEncodeError):
- obj = space.realunicode_w(space.getattr(w_exc, space.newtext('object')))
+ w_obj = space.getattr(w_exc, space.newtext('object'))
+ obj = space.realunicode_w(w_obj)
start = space.int_w(space.getattr(w_exc, space.newtext('start')))
w_end = space.getattr(w_exc, space.newtext('end'))
end = space.int_w(w_end)
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -115,10 +115,10 @@
raises(TypeError, charmap_decode, '\xff', "strict", {0xff: 0x110000})
assert (charmap_decode("\x00\x01\x02", "strict",
{0: 0x10FFFF, 1: ord('b'), 2: ord('c')}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
assert (charmap_decode("\x00\x01\x02", "strict",
{0: u'\U0010FFFF', 1: u'b', 2: u'c'}) ==
- u"\U0010FFFFbc", 3)
+ (u"\U0010FFFFbc", 3))
def test_escape_decode_errors(self):
from _codecs import escape_decode as decode
@@ -537,8 +537,12 @@
assert '\xff'.decode('utf-7', 'ignore') == ''
assert '\x00'.decode('unicode-internal', 'ignore') == ''
- def test_backslahreplace(self):
- assert u'a\xac\u1234\u20ac\u8000'.encode('ascii', 'backslashreplace') == 'a\\xac\u1234\u20ac\u8000'
+ def test_backslashreplace(self):
+ sin = u"a\xac\u1234\u20ac\u8000\U0010ffff"
+ expected = "a\\xac\\u1234\\u20ac\\u8000\\U0010ffff"
+ assert sin.encode('ascii', 'backslashreplace') == expected
+ expected = "a\xac\\u1234\xa4\\u8000\\U0010ffff"
+ assert sin.encode("iso-8859-15", "backslashreplace") == expected
def test_badhandler(self):
import codecs
@@ -592,11 +596,11 @@
def handler_unicodeinternal(exc):
if not isinstance(exc, UnicodeDecodeError):
raise TypeError("don't know how to handle %r" % exc)
- return (u"\x01", 1)
+ return (u"\x01", 4)
codecs.register_error("test.hui", handler_unicodeinternal)
res = "\x00\x00\x00\x00\x00".decode("unicode-internal", "test.hui")
if sys.maxunicode > 65535:
- assert res == u"\u0000\u0001\u0000" # UCS4 build
+ assert res == u"\u0000\u0001" # UCS4 build
else:
assert res == u"\x00\x00\x01\x00\x00" # UCS2 build
@@ -750,3 +754,31 @@
assert _codecs.unicode_escape_decode(b) == (u'', 0)
assert _codecs.raw_unicode_escape_decode(b) == (u'', 0)
assert _codecs.unicode_internal_decode(b) == (u'', 0)
+
+ def test_xmlcharrefreplace(self):
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('latin1', 'xmlcharrefreplace')
+ assert r == 'ሴ\x80⍅y\xab'
+ r = u'\u1234\u0080\u2345\u0079\u00AB'.encode('ascii', 'xmlcharrefreplace')
+ assert r == 'ሴ⍅y«'
+
+ def test_errorhandler_collection(self):
+ import _codecs
+ errors = []
+ def record_error(exc):
+ if not isinstance(exc, UnicodeEncodeError):
+ raise TypeError("don't know how to handle %r" % exc)
+ errors.append(exc.object[exc.start:exc.end])
+ return (u'', exc.end)
+ _codecs.register_error("test.record", record_error)
+
+ sin = u"\xac\u1234\u1234\u20ac\u8000"
+ assert sin.encode("ascii", "test.record") == ""
+ assert errors == [sin]
+
+ errors = []
+ assert sin.encode("latin-1", "test.record") == "\xac"
+ assert errors == [u'\u1234\u1234\u20ac\u8000']
+
+ errors = []
+ assert sin.encode("iso-8859-15", "test.record") == "\xac\xa4"
+ assert errors == [u'\u1234\u1234', u'\u8000']
diff --git a/pypy/module/_io/interp_textio.py b/pypy/module/_io/interp_textio.py
--- a/pypy/module/_io/interp_textio.py
+++ b/pypy/module/_io/interp_textio.py
@@ -414,6 +414,7 @@
if not space.isinstance_w(w_decoded, space.w_unicode):
msg = "decoder should return a string result, not '%T'"
raise oefmt(space.w_TypeError, msg, w_decoded)
+ return w_decoded
class W_TextIOWrapper(W_TextIOBase):
@@ -940,12 +941,13 @@
w_decoded = space.call_method(self.w_decoder, "decode",
w_chunk, space.newbool(bool(cookie.need_eof)))
- self.decoded.set(space, w_decoded)
+ w_decoded = check_decoded(space, w_decoded)
# Skip chars_to_skip of the decoded characters
- if len(self.decoded.text) < cookie.chars_to_skip:
+ if space.len_w(w_decoded) < cookie.chars_to_skip:
raise oefmt(space.w_IOError,
"can't restore logical file position")
+ self.decoded.set(space, w_decoded)
self.decoded.pos = cookie.chars_to_skip
else:
self.snapshot = PositionSnapshot(cookie.dec_flags, "")
@@ -958,10 +960,8 @@
def tell_w(self, space):
self._check_closed(space)
-
if not self.seekable:
raise oefmt(space.w_IOError, "underlying stream is not seekable")
-
if not self.telling:
raise oefmt(space.w_IOError,
"telling position disabled by next() call")
@@ -1031,14 +1031,14 @@
# We didn't get enough decoded data; signal EOF to get more.
w_decoded = space.call_method(self.w_decoder, "decode",
space.newbytes(""),
- space.newint(1)) # final=1
+ space.newint(1)) # final=1
check_decoded(space, w_decoded)
- chars_decoded += len(space.unicode_w(w_decoded))
+ chars_decoded += space.len_w(w_decoded)
cookie.need_eof = 1
if chars_decoded < chars_to_skip:
raise oefmt(space.w_IOError,
- "can't reconstruct logical file position")
+ "can't reconstruct logical file position")
finally:
space.call_method(self.w_decoder, "setstate", w_saved_state)
diff --git a/pypy/module/_io/test/test_interp_textio.py b/pypy/module/_io/test/test_interp_textio.py
--- a/pypy/module/_io/test/test_interp_textio.py
+++ b/pypy/module/_io/test/test_interp_textio.py
@@ -40,7 +40,8 @@
w_newline=space.newtext(mode))
lines = []
for limit in limits:
- line = space.unicode_w(w_textio.readline_w(space, space.newint(limit)))
+ w_line = w_textio.readline_w(space, space.newint(limit))
+ line = space.unicode_w(w_line)
if limit >= 0:
assert len(line) <= limit
if line:
diff --git a/pypy/module/_pypyjson/interp_encoder.py b/pypy/module/_pypyjson/interp_encoder.py
--- a/pypy/module/_pypyjson/interp_encoder.py
+++ b/pypy/module/_pypyjson/interp_encoder.py
@@ -49,24 +49,24 @@
first = 0
for i in range(first, len(u)):
- c = u[i]
- if c <= u'~':
- if c == u'"' or c == u'\\':
+ c = ord(u[i])
+ if c <= ord('~'):
+ if c == ord('"') or c == ord('\\'):
sb.append('\\')
- elif c < u' ':
- sb.append(ESCAPE_BEFORE_SPACE[ord(c)])
+ elif c < ord(' '):
+ sb.append(ESCAPE_BEFORE_SPACE[c])
continue
- sb.append(chr(ord(c)))
+ sb.append(chr(c))
else:
- if c <= u'\uffff':
+ if c <= ord(u'\uffff'):
sb.append('\\u')
- sb.append(HEX[ord(c) >> 12])
- sb.append(HEX[(ord(c) >> 8) & 0x0f])
- sb.append(HEX[(ord(c) >> 4) & 0x0f])
- sb.append(HEX[ord(c) & 0x0f])
+ sb.append(HEX[c >> 12])
+ sb.append(HEX[(c >> 8) & 0x0f])
+ sb.append(HEX[(c >> 4) & 0x0f])
+ sb.append(HEX[c & 0x0f])
else:
# surrogate pair
- n = ord(c) - 0x10000
+ n = c - 0x10000
s1 = 0xd800 | ((n >> 10) & 0x3ff)
sb.append('\\ud')
sb.append(HEX[(s1 >> 8) & 0x0f])
diff --git a/pypy/module/_rawffi/alt/type_converter.py b/pypy/module/_rawffi/alt/type_converter.py
--- a/pypy/module/_rawffi/alt/type_converter.py
+++ b/pypy/module/_rawffi/alt/type_converter.py
@@ -128,7 +128,7 @@
intval: lltype.Signed
"""
self.error(w_ffitype, w_obj)
-
+
def handle_unichar(self, w_ffitype, w_obj, intval):
"""
intval: lltype.Signed
@@ -174,7 +174,7 @@
def handle_struct_rawffi(self, w_ffitype, w_structinstance):
"""
This method should be killed as soon as we remove support for _rawffi structures
-
+
w_structinstance: W_StructureInstance
"""
self.error(w_ffitype, w_structinstance)
@@ -349,7 +349,7 @@
def get_struct_rawffi(self, w_ffitype, w_structdescr):
"""
This should be killed as soon as we kill support for _rawffi structures
-
+
Return type: lltype.Unsigned
(the address of the structure)
"""
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -481,11 +481,13 @@
@unwrap_spec(w_groupnum=WrappedDefault(0))
def start_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[0])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(start)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def end_w(self, w_groupnum):
- return self.space.newint(self.do_span(w_groupnum)[1])
+ start, end = self.do_span(w_groupnum)
+ return self.space.newint(end)
@unwrap_spec(w_groupnum=WrappedDefault(0))
def span_w(self, w_groupnum):
diff --git a/pypy/module/_sre/test/test_app_sre.py b/pypy/module/_sre/test/test_app_sre.py
--- a/pypy/module/_sre/test/test_app_sre.py
+++ b/pypy/module/_sre/test/test_app_sre.py
@@ -87,6 +87,14 @@
assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ def test_findall_unicode(self):
+ import re
+ assert [u"\u1234"] == re.findall(u"\u1234", u"\u1000\u1234\u2000")
+ assert ["a", "u"] == re.findall("b(.)", "abalbus")
+ assert [("a", "l"), ("u", "s")] == re.findall("b(.)(.)", "abalbus")
+ assert [("a", ""), ("s", "s")] == re.findall("b(a|(s))", "babs")
+ assert [u"xyz"] == re.findall(u".*yz", u"xyz")
+
def test_finditer(self):
import re
it = re.finditer("b(.)", "brabbel")
@@ -999,3 +1007,15 @@
import re
assert re.search(".+ab", "wowowowawoabwowo")
assert None == re.search(".+ab", "wowowaowowo")
+
+
+class AppTestUnicodeExtra:
+ def test_string_attribute(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.string == u"\u1233\u1234\u1235"
+
+ def test_match_start(self):
+ import re
+ match = re.search(u"\u1234", u"\u1233\u1234\u1235")
+ assert match.start() == 1
diff --git a/pypy/module/unicodedata/interp_ucd.py b/pypy/module/unicodedata/interp_ucd.py
--- a/pypy/module/unicodedata/interp_ucd.py
+++ b/pypy/module/unicodedata/interp_ucd.py
@@ -259,10 +259,10 @@
result[0] = ch
if not composed: # If decomposed normalization we are done
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
if j <= 1:
- return space.newunicode(u''.join([unichr(i) for i in result[:j]]))
+ return self.build(space, result, stop=j)
current = result[0]
starter_pos = 0
@@ -310,7 +310,10 @@
result[starter_pos] = current
- return space.newunicode(u''.join([unichr(i) for i in result[:next_insert]]))
+ return self.build(space, result, stop=next_insert)
+
+ def build(self, space, r, stop):
+ return space.newunicode(u''.join([unichr(i) for i in r[:stop]]))
methods = {}
diff --git a/pypy/objspace/std/test/test_unicodeobject.py b/pypy/objspace/std/test/test_unicodeobject.py
--- a/pypy/objspace/std/test/test_unicodeobject.py
+++ b/pypy/objspace/std/test/test_unicodeobject.py
@@ -135,6 +135,11 @@
check(u'a' + 'b', u'ab')
check('a' + u'b', u'ab')
+ def test_getitem(self):
+ assert u'abc'[2] == 'c'
+ raises(IndexError, u'abc'.__getitem__, 15)
+ assert u'g\u0105\u015b\u0107'[2] == u'\u015b'
+
def test_join(self):
def check(a, b):
assert a == b
@@ -171,6 +176,8 @@
assert u'\n\n'.splitlines() == [u'', u'']
assert u'a\nb\nc'.splitlines(1) == [u'a\n', u'b\n', u'c']
assert u'\na\nb\n'.splitlines(1) == [u'\n', u'a\n', u'b\n']
+ assert ((u'a' + '\xc2\x85'.decode('utf8') + u'b\n').splitlines() ==
+ ['a', 'b'])
def test_zfill(self):
assert u'123'.zfill(2) == u'123'
@@ -217,6 +224,7 @@
raises(ValueError, u'abc'.split, u'')
raises(ValueError, 'abc'.split, u'')
assert u' a b c d'.split(None, 0) == [u'a b c d']
+ assert u'a\nb\u1680c'.split() == [u'a', u'b', u'c']
def test_rsplit(self):
assert u"".rsplit() == []
@@ -246,6 +254,7 @@
raises(ValueError, 'abc'.rsplit, u'')
assert u' a b c '.rsplit(None, 0) == [u' a b c']
assert u''.rsplit('aaa') == [u'']
+ assert u'a\nb\u1680c'.rsplit() == [u'a', u'b', u'c']
def test_split_rsplit_str_unicode(self):
x = 'abc'.split(u'b')
@@ -291,6 +300,8 @@
assert u"bROWN fOX".title() == u"Brown Fox"
assert u"Brown Fox".title() == u"Brown Fox"
assert u"bro!wn fox".title() == u"Bro!Wn Fox"
+ assert u"brow\u4321n fox".title() == u"Brow\u4321N Fox"
+ assert u'\ud800'.title() == u'\ud800'
def test_istitle(self):
assert u"".istitle() == False
@@ -315,6 +326,18 @@
assert not u'\u01c5abc'.islower()
assert not u'\u01c5ABC'.isupper()
+ def test_lower_upper(self):
+ assert u'a'.lower() == u'a'
+ assert u'A'.lower() == u'a'
+ assert u'\u0105'.lower() == u'\u0105'
+ assert u'\u0104'.lower() == u'\u0105'
+ assert u'\ud800'.lower() == u'\ud800'
+ assert u'a'.upper() == u'A'
+ assert u'A'.upper() == u'A'
+ assert u'\u0105'.upper() == u'\u0104'
+ assert u'\u0104'.upper() == u'\u0104'
+ assert u'\ud800'.upper() == u'\ud800'
+
def test_capitalize(self):
assert u"brown fox".capitalize() == u"Brown fox"
assert u' hello '.capitalize() == u' hello '
@@ -336,6 +359,8 @@
# check with Ll chars with no upper - nothing changes here
assert (u'\u019b\u1d00\u1d86\u0221\u1fb7'.capitalize() ==
u'\u019b\u1d00\u1d86\u0221\u1fb7')
+ assert u'\ud800'.capitalize() == u'\ud800'
+ assert u'xx\ud800'.capitalize() == u'Xx\ud800'
def test_rjust(self):
s = u"abc"
@@ -376,6 +401,16 @@
assert u'one!two!three!'.replace('x', '@') == u'one!two!three!'
assert u'one!two!three!'.replace(u'x', '@', 2) == u'one!two!three!'
assert u'abc'.replace('', u'-') == u'-a-b-c-'
+ assert u'\u1234'.replace(u'', '-') == u'-\u1234-'
+ assert u'\u0234\u5678'.replace('', u'-') == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 0) == u'\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 1) == u'-\u0234\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 2) == u'-\u0234-\u5678'
+ assert u'\u0234\u5678'.replace('', u'-', 3) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 4) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', 700) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -1) == u'-\u0234-\u5678-'
+ assert u'\u0234\u5678'.replace('', u'-', -42) == u'-\u0234-\u5678-'
assert u'abc'.replace(u'', u'-', 3) == u'-a-b-c'
assert u'abc'.replace('', '-', 0) == u'abc'
assert u''.replace(u'', '') == u''
@@ -479,6 +514,9 @@
assert u''.startswith(u'a') is False
assert u'x'.startswith(u'xx') is False
assert u'y'.startswith(u'xx') is False
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234') is True
+ assert u'\u1234\u5678\u4321'.startswith(u'\u1234\u4321') is False
+ assert u'\u1234'.startswith(u'', 1, 0) is True
def test_startswith_more(self):
assert u'ab'.startswith(u'a', 0) is True
@@ -589,7 +627,7 @@
raises(TypeError, u'hello'.translate)
raises(TypeError, u'abababc'.translate, {ord('a'):''})
- def test_unicode_form_encoded_object(self):
+ def test_unicode_from_encoded_object(self):
assert unicode('x', 'utf-8') == u'x'
assert unicode('x', 'utf-8', 'strict') == u'x'
@@ -634,6 +672,8 @@
assert u'\u20ac'.encode('utf-8') == '\xe2\x82\xac'
assert u'\ud800\udc02'.encode('utf-8') == '\xf0\x90\x80\x82'
assert u'\ud84d\udc56'.encode('utf-8') == '\xf0\xa3\x91\x96'
+ assert u'\ud800\udc02'.encode('uTf-8') == '\xf0\x90\x80\x82'
+ assert u'\ud84d\udc56'.encode('Utf8') == '\xf0\xa3\x91\x96'
assert u'\ud800'.encode('utf-8') == '\xed\xa0\x80'
assert u'\udc00'.encode('utf-8') == '\xed\xb0\x80'
assert (u'\ud800\udc02'*1000).encode('utf-8') == '\xf0\x90\x80\x82'*1000
@@ -745,6 +785,7 @@
def test_index(self):
assert u"rrarrrrrrrrra".index(u'a', 4, None) == 12
assert u"rrarrrrrrrrra".index(u'a', None, 6) == 2
+ assert u"\u1234\u4321\u5678".index(u'\u5678', 1) == 2
def test_rindex(self):
from sys import maxint
@@ -754,6 +795,7 @@
assert u'abcdefghiabc'.rindex(u'abc', 0, -1) == 0
assert u'abcdefghiabc'.rindex(u'abc', -4*maxint, 4*maxint) == 9
assert u'rrarrrrrrrrra'.rindex(u'a', 4, None) == 12
+ assert u"\u1234\u5678".rindex(u'\u5678') == 1
raises(ValueError, u'abcdefghiabc'.rindex, u'hib')
raises(ValueError, u'defghiabc'.rindex, u'def', 1)
@@ -768,12 +810,15 @@
assert u'abcdefghiabc'.rfind(u'') == 12
assert u'abcdefghiabc'.rfind(u'abcd') == 0
assert u'abcdefghiabc'.rfind(u'abcz') == -1
+ assert u"\u1234\u5678".rfind(u'\u5678') == 1
def test_rfind_corner_case(self):
assert u'abc'.rfind('', 4) == -1
def test_find_index_str_unicode(self):
- assert 'abcdefghiabc'.find(u'bc') == 1
+ assert u'abcdefghiabc'.find(u'bc') == 1
+ assert u'ab\u0105b\u0107'.find('b', 2) == 3
+ assert u'ab\u0105b\u0107'.find('b', 0, 1) == -1
assert 'abcdefghiabc'.rfind(u'abc') == 9
raises(UnicodeDecodeError, '\x80'.find, u'')
raises(UnicodeDecodeError, '\x80'.rfind, u'')
@@ -781,6 +826,7 @@
assert 'abcdefghiabc'.rindex(u'abc') == 9
raises(UnicodeDecodeError, '\x80'.index, u'')
raises(UnicodeDecodeError, '\x80'.rindex, u'')
+ assert u"\u1234\u5678".find(u'\u5678') == 1
def test_count(self):
assert u"".count(u"x") ==0
@@ -807,6 +853,7 @@
def test_swapcase(self):
assert u'\xe4\xc4\xdf'.swapcase() == u'\xc4\xe4\xdf'
+ assert u'\ud800'.swapcase() == u'\ud800'
def test_buffer(self):
buf = buffer(u'XY')
@@ -878,16 +925,31 @@
def test_getslice(self):
assert u'123456'.__getslice__(1, 5) == u'2345'
- s = u"abc"
- assert s[:] == "abc"
- assert s[1:] == "bc"
- assert s[:2] == "ab"
- assert s[1:2] == "b"
- assert s[-2:] == "bc"
- assert s[:-1] == "ab"
- assert s[-2:2] == "b"
- assert s[1:-1] == "b"
- assert s[-2:-1] == "b"
+ s = u"\u0105b\u0107"
+ assert s[:] == u"\u0105b\u0107"
+ assert s[1:] == u"b\u0107"
+ assert s[:2] == u"\u0105b"
+ assert s[1:2] == u"b"
+ assert s[-2:] == u"b\u0107"
+ assert s[:-1] == u"\u0105b"
+ assert s[-2:2] == u"b"
+ assert s[1:-1] == u"b"
+ assert s[-2:-1] == u"b"
+
+ def test_getitem_slice(self):
+ assert u'123456'.__getitem__(slice(1, 5)) == u'2345'
+ s = u"\u0105b\u0107"
+ assert s[slice(3)] == u"\u0105b\u0107"
+ assert s[slice(1, 3)] == u"b\u0107"
+ assert s[slice(2)] == u"\u0105b"
+ assert s[slice(1,2)] == u"b"
+ assert s[slice(-2,3)] == u"b\u0107"
+ assert s[slice(-1)] == u"\u0105b"
+ assert s[slice(-2,2)] == u"b"
+ assert s[slice(1,-1)] == u"b"
+ assert s[slice(-2,-1)] == u"b"
+ assert u"abcde"[::2] == u"ace"
+ assert u"\u0105\u0106\u0107abcd"[::2] == u"\u0105\u0107bd"
def test_no_len_on_str_iter(self):
iterable = u"hello"
More information about the pypy-commit
mailing list