[pypy-svn] pypy 32ptr-on-64bit: hg merge default
arigo
commits-noreply at bitbucket.org
Sat Apr 16 22:12:12 CEST 2011
Author: Armin Rigo <arigo at tunes.org>
Branch: 32ptr-on-64bit
Changeset: r43409:78429cb225cf
Date: 2011-04-16 22:11 +0200
http://bitbucket.org/pypy/pypy/changeset/78429cb225cf/
Log: hg merge default
diff --git a/pypy/rlib/rsre/test/test_zjit.py b/pypy/rlib/rsre/test/test_zjit.py
--- a/pypy/rlib/rsre/test/test_zjit.py
+++ b/pypy/rlib/rsre/test/test_zjit.py
@@ -1,5 +1,5 @@
import py
-from pypy.jit.metainterp.test import test_basic
+from pypy.jit.metainterp.test import support
from pypy.rlib.nonconst import NonConstant
from pypy.rlib.rsre.test.test_match import get_code
from pypy.rlib.rsre import rsre_core
@@ -45,7 +45,7 @@
assert m._jit_unroll_safe_
-class TestJitRSre(test_basic.LLJitMixin):
+class TestJitRSre(support.LLJitMixin):
def meta_interp_match(self, pattern, string, repeat=1):
r = get_code(pattern)
diff --git a/pypy/rlib/test/test_runicode.py b/pypy/rlib/test/test_runicode.py
--- a/pypy/rlib/test/test_runicode.py
+++ b/pypy/rlib/test/test_runicode.py
@@ -66,9 +66,10 @@
assert called[0]
assert "42424242" in result
- def checkdecodeerror(self, s, encoding, start, stop, addstuff=True):
+ def checkdecodeerror(self, s, encoding, start, stop,
+ addstuff=True, msg=None):
called = [0]
- def errorhandler(errors, enc, msg, t, startingpos,
+ def errorhandler(errors, enc, errmsg, t, startingpos,
endingpos):
called[0] += 1
if called[0] == 1:
@@ -77,6 +78,8 @@
assert t is s
assert start == startingpos
assert stop == endingpos
+ if msg is not None:
+ assert errmsg == msg
return u"42424242", stop
return u"", endingpos
decoder = self.getdecoder(encoding)
@@ -90,7 +93,7 @@
class TestDecoding(UnicodeTests):
-
+
# XXX test bom recognition in utf-16
# XXX test proper error handling
@@ -131,6 +134,96 @@
"utf-32 utf-32-be utf-32-le").split():
self.checkdecode(uni, encoding)
+ def test_ascii_error(self):
+ self.checkdecodeerror("abc\xFF\xFF\xFFcde", "ascii", 3, 4)
+
+ def test_utf16_errors(self):
+ # trunkated BOM
+ for s in ["\xff", "\xfe"]:
+ self.checkdecodeerror(s, "utf-16", 0, len(s), addstuff=False)
+
+ for s in [
+ # unexpected end of data ascii
+ "\xff\xfeF",
+ # unexpected end of data
+ '\xff\xfe\xc0\xdb\x00', '\xff\xfe\xc0\xdb', '\xff\xfe\xc0',
+ ]:
+ self.checkdecodeerror(s, "utf-16", 2, len(s), addstuff=False)
+ for s in [
+ # illegal surrogate
+ "\xff\xfe\xff\xdb\xff\xff",
+ ]:
+ self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False)
+
+ def test_utf16_bugs(self):
+ s = '\x80-\xe9\xdeL\xa3\x9b'
+ py.test.raises(UnicodeDecodeError, runicode.str_decode_utf_16_le,
+ s, len(s), True)
+
+ def test_utf7_bugs(self):
+ u = u'A\u2262\u0391.'
+ assert runicode.unicode_encode_utf_7(u, len(u), None) == 'A+ImIDkQ.'
+
+ def test_utf7_tofrom_utf8_bug(self):
+ def _assert_decu7(input, expected):
+ assert runicode.str_decode_utf_7(input, len(input), None) == (expected, len(input))
+
+ _assert_decu7('+-', u'+')
+ _assert_decu7('+-+-', u'++')
+ _assert_decu7('+-+AOQ-', u'+\xe4')
+ _assert_decu7('+AOQ-', u'\xe4')
+ _assert_decu7('+AOQ-', u'\xe4')
+ _assert_decu7('+AOQ- ', u'\xe4 ')
+ _assert_decu7(' +AOQ-', u' \xe4')
+ _assert_decu7(' +AOQ- ', u' \xe4 ')
+ _assert_decu7('+AOQ-+AOQ-', u'\xe4\xe4')
+
+ s_utf7 = 'Die M+AOQ-nner +AOQ-rgen sich!'
+ s_utf8 = u'Die Männer ärgen sich!'
+ s_utf8_esc = u'Die M\xe4nner \xe4rgen sich!'
+
+ _assert_decu7(s_utf7, s_utf8_esc)
+ _assert_decu7(s_utf7, s_utf8)
+
+ assert runicode.unicode_encode_utf_7(s_utf8_esc, len(s_utf8_esc), None) == s_utf7
+ assert runicode.unicode_encode_utf_7(s_utf8, len(s_utf8_esc), None) == s_utf7
+
+ def test_utf7_partial(self):
+ s = u"a+-b".encode('utf-7')
+ assert s == "a+--b"
+ decode = self.getdecoder('utf-7')
+ assert decode(s, 1, None) == (u'a', 1)
+ assert decode(s, 2, None) == (u'a', 1)
+ assert decode(s, 3, None) == (u'a+', 3)
+ assert decode(s, 4, None) == (u'a+-', 4)
+ assert decode(s, 5, None) == (u'a+-b', 5)
+
+ def test_utf7_surrogates(self):
+ encode = self.getencoder('utf-7')
+ u = u'\U000abcde'
+ assert encode(u, len(u), None) == '+2m/c3g-'
+ decode = self.getdecoder('utf-7')
+ s = '+3ADYAA-'
+ raises(UnicodeError, decode, s, len(s), None)
+ def replace_handler(errors, codec, message, input, start, end):
+ return u'?', end
+ assert decode(s, len(s), None, final=True,
+ errorhandler = replace_handler) == (u'??', len(s))
+
+
+class TestUTF8Decoding(UnicodeTests):
+ def __init__(self):
+ self.decoder = self.getdecoder('utf-8')
+
+ def replace_handler(self, errors, codec, message, input, start, end):
+ return u'\ufffd', end
+
+ def ignore_handler(self, errors, codec, message, input, start, end):
+ return u'', end
+
+ def to_bytestring(self, bytes):
+ return ''.join(chr(int(c, 16)) for c in bytes.split())
+
def test_single_chars_utf8(self):
for s in ["\xd7\x90", "\xd6\x96", "\xeb\x96\x95", "\xf0\x90\x91\x93"]:
self.checkdecode(s, "utf-8")
@@ -140,30 +233,297 @@
# This test will raise an error with python 3.x
self.checkdecode(u"\ud800", "utf-8")
+ def test_invalid_start_byte(self):
+ """
+ Test that an 'invalid start byte' error is raised when the first byte
+ is not in the ASCII range or is not a valid start byte of a 2-, 3-, or
+ 4-bytes sequence. The invalid start byte is replaced with a single
+ U+FFFD when errors='replace'.
+ E.g. <80> is a continuation byte and can appear only after a start byte.
+ """
+ FFFD = u'\ufffd'
+ for byte in '\x80\xA0\x9F\xBF\xC0\xC1\xF5\xFF':
+ raises(UnicodeDecodeError, self.decoder, byte, 1, None, final=True)
+ self.checkdecodeerror(byte, 'utf-8', 0, 1, addstuff=False,
+ msg='invalid start byte')
+ assert self.decoder(byte, 1, None, final=True,
+ errorhandler=self.replace_handler) == (FFFD, 1)
+ assert (self.decoder('aaaa' + byte + 'bbbb', 9, None,
+ final=True, errorhandler=self.replace_handler) ==
+ (u'aaaa'+ FFFD + u'bbbb', 9))
+ assert self.decoder(byte, 1, None, final=True,
+ errorhandler=self.ignore_handler) == (u'', 1)
+ assert (self.decoder('aaaa' + byte + 'bbbb', 9, None,
+ final=True, errorhandler=self.ignore_handler) ==
+ (u'aaaabbbb', 9))
+
+ def test_unexpected_end_of_data(self):
+ """
+ Test that an 'unexpected end of data' error is raised when the string
+ ends after a start byte of a 2-, 3-, or 4-bytes sequence without having
+ enough continuation bytes. The incomplete sequence is replaced with a
+ single U+FFFD when errors='replace'.
+ E.g. in the sequence <F3 80 80>, F3 is the start byte of a 4-bytes
+ sequence, but it's followed by only 2 valid continuation bytes and the
+ last continuation bytes is missing.
+ Note: the continuation bytes must be all valid, if one of them is
+ invalid another error will be raised.
+ """
+ sequences = [
+ 'C2', 'DF',
+ 'E0 A0', 'E0 BF', 'E1 80', 'E1 BF', 'EC 80', 'EC BF',
+ 'ED 80', 'ED 9F', 'EE 80', 'EE BF', 'EF 80', 'EF BF',
+ 'F0 90', 'F0 BF', 'F0 90 80', 'F0 90 BF', 'F0 BF 80', 'F0 BF BF',
+ 'F1 80', 'F1 BF', 'F1 80 80', 'F1 80 BF', 'F1 BF 80', 'F1 BF BF',
+ 'F3 80', 'F3 BF', 'F3 80 80', 'F3 80 BF', 'F3 BF 80', 'F3 BF BF',
+ 'F4 80', 'F4 8F', 'F4 80 80', 'F4 80 BF', 'F4 8F 80', 'F4 8F BF'
+ ]
+ FFFD = u'\ufffd'
+ for seq in sequences:
+ seq = self.to_bytestring(seq)
+ raises(UnicodeDecodeError, self.decoder, seq, len(seq),
+ None, final=True)
+ self.checkdecodeerror(seq, 'utf-8', 0, len(seq), addstuff=False,
+ msg='unexpected end of data')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.replace_handler) == (FFFD, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.replace_handler) ==
+ (u'aaaa'+ FFFD + u'bbbb', len(seq) + 8))
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.ignore_handler) == (u'', len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.ignore_handler) ==
+ (u'aaaabbbb', len(seq) + 8))
+
+ def test_invalid_cb_for_2bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte of a 2-bytes sequence is invalid. The start byte
+ is replaced by a single U+FFFD and the second byte is handled
+ separately when errors='replace'.
+ E.g. in the sequence <C2 41>, C2 is the start byte of a 2-bytes
+ sequence, but 41 is not a valid continuation byte because it's the
+ ASCII letter 'A'.
+ """
+ FFFD = u'\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('C2 00', FFFD+u'\x00'), ('C2 7F', FFFD+u'\x7f'),
+ ('C2 C0', FFFDx2), ('C2 FF', FFFDx2),
+ ('DF 00', FFFD+u'\x00'), ('DF 7F', FFFD+u'\x7f'),
+ ('DF C0', FFFDx2), ('DF FF', FFFDx2),
+ ]
+ for seq, res in sequences:
+ seq = self.to_bytestring(seq)
+ raises(UnicodeDecodeError, self.decoder, seq, len(seq),
+ None, final=True)
+ self.checkdecodeerror(seq, 'utf-8', 0, 1, addstuff=False,
+ msg='invalid continuation byte')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.replace_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.replace_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+ res = res.replace(FFFD, u'')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.ignore_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.ignore_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+
+ def test_invalid_cb_for_3bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte(s) of a 3-bytes sequence are invalid. When
+ errors='replace', if the first continuation byte is valid, the first
+ two bytes (start byte + 1st cb) are replaced by a single U+FFFD and the
+ third byte is handled separately, otherwise only the start byte is
+ replaced with a U+FFFD and the other continuation bytes are handled
+ separately.
+ E.g. in the sequence <E1 80 41>, E1 is the start byte of a 3-bytes
+ sequence, 80 is a valid continuation byte, but 41 is not a valid cb
+ because it's the ASCII letter 'A'.
+ Note: when the start byte is E0 or ED, the valid ranges for the first
+ continuation byte are limited to A0..BF and 80..9F respectively.
+ However, when the start byte is ED, Python 2 considers all the bytes
+ in range 80..BF valid. This is fixed in Python 3.
+ """
+ FFFD = u'\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('E0 00', FFFD+u'\x00'), ('E0 7F', FFFD+u'\x7f'), ('E0 80', FFFDx2),
+ ('E0 9F', FFFDx2), ('E0 C0', FFFDx2), ('E0 FF', FFFDx2),
+ ('E0 A0 00', FFFD+u'\x00'), ('E0 A0 7F', FFFD+u'\x7f'),
+ ('E0 A0 C0', FFFDx2), ('E0 A0 FF', FFFDx2),
+ ('E0 BF 00', FFFD+u'\x00'), ('E0 BF 7F', FFFD+u'\x7f'),
+ ('E0 BF C0', FFFDx2), ('E0 BF FF', FFFDx2), ('E1 00', FFFD+u'\x00'),
+ ('E1 7F', FFFD+u'\x7f'), ('E1 C0', FFFDx2), ('E1 FF', FFFDx2),
+ ('E1 80 00', FFFD+u'\x00'), ('E1 80 7F', FFFD+u'\x7f'),
+ ('E1 80 C0', FFFDx2), ('E1 80 FF', FFFDx2),
+ ('E1 BF 00', FFFD+u'\x00'), ('E1 BF 7F', FFFD+u'\x7f'),
+ ('E1 BF C0', FFFDx2), ('E1 BF FF', FFFDx2), ('EC 00', FFFD+u'\x00'),
+ ('EC 7F', FFFD+u'\x7f'), ('EC C0', FFFDx2), ('EC FF', FFFDx2),
+ ('EC 80 00', FFFD+u'\x00'), ('EC 80 7F', FFFD+u'\x7f'),
+ ('EC 80 C0', FFFDx2), ('EC 80 FF', FFFDx2),
+ ('EC BF 00', FFFD+u'\x00'), ('EC BF 7F', FFFD+u'\x7f'),
+ ('EC BF C0', FFFDx2), ('EC BF FF', FFFDx2), ('ED 00', FFFD+u'\x00'),
+ ('ED 7F', FFFD+u'\x7f'),
+ # ('ED A0', FFFDx2), ('ED BF', FFFDx2), # see note ^
+ ('ED C0', FFFDx2), ('ED FF', FFFDx2), ('ED 80 00', FFFD+u'\x00'),
+ ('ED 80 7F', FFFD+u'\x7f'), ('ED 80 C0', FFFDx2),
+ ('ED 80 FF', FFFDx2), ('ED 9F 00', FFFD+u'\x00'),
+ ('ED 9F 7F', FFFD+u'\x7f'), ('ED 9F C0', FFFDx2),
+ ('ED 9F FF', FFFDx2), ('EE 00', FFFD+u'\x00'),
+ ('EE 7F', FFFD+u'\x7f'), ('EE C0', FFFDx2), ('EE FF', FFFDx2),
+ ('EE 80 00', FFFD+u'\x00'), ('EE 80 7F', FFFD+u'\x7f'),
+ ('EE 80 C0', FFFDx2), ('EE 80 FF', FFFDx2),
+ ('EE BF 00', FFFD+u'\x00'), ('EE BF 7F', FFFD+u'\x7f'),
+ ('EE BF C0', FFFDx2), ('EE BF FF', FFFDx2), ('EF 00', FFFD+u'\x00'),
+ ('EF 7F', FFFD+u'\x7f'), ('EF C0', FFFDx2), ('EF FF', FFFDx2),
+ ('EF 80 00', FFFD+u'\x00'), ('EF 80 7F', FFFD+u'\x7f'),
+ ('EF 80 C0', FFFDx2), ('EF 80 FF', FFFDx2),
+ ('EF BF 00', FFFD+u'\x00'), ('EF BF 7F', FFFD+u'\x7f'),
+ ('EF BF C0', FFFDx2), ('EF BF FF', FFFDx2),
+ ]
+ for seq, res in sequences:
+ seq = self.to_bytestring(seq)
+ raises(UnicodeDecodeError, self.decoder, seq, len(seq),
+ None, final=True)
+ self.checkdecodeerror(seq, 'utf-8', 0, len(seq)-1, addstuff=False,
+ msg='invalid continuation byte')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.replace_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.replace_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+ res = res.replace(FFFD, u'')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.ignore_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.ignore_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+
+ def test_invalid_cb_for_4bytes_seq(self):
+ """
+ Test that an 'invalid continuation byte' error is raised when the
+ continuation byte(s) of a 4-bytes sequence are invalid. When
+ errors='replace',the start byte and all the following valid
+ continuation bytes are replaced with a single U+FFFD, and all the bytes
+ starting from the first invalid continuation bytes (included) are
+ handled separately.
+ E.g. in the sequence <E1 80 41>, E1 is the start byte of a 3-bytes
+ sequence, 80 is a valid continuation byte, but 41 is not a valid cb
+ because it's the ASCII letter 'A'.
+ Note: when the start byte is E0 or ED, the valid ranges for the first
+ continuation byte are limited to A0..BF and 80..9F respectively.
+ However, when the start byte is ED, Python 2 considers all the bytes
+ in range 80..BF valid. This is fixed in Python 3.
+ """
+ FFFD = u'\ufffd'
+ FFFDx2 = FFFD * 2
+ sequences = [
+ ('F0 00', FFFD+u'\x00'), ('F0 7F', FFFD+u'\x7f'), ('F0 80', FFFDx2),
+ ('F0 8F', FFFDx2), ('F0 C0', FFFDx2), ('F0 FF', FFFDx2),
+ ('F0 90 00', FFFD+u'\x00'), ('F0 90 7F', FFFD+u'\x7f'),
+ ('F0 90 C0', FFFDx2), ('F0 90 FF', FFFDx2),
+ ('F0 BF 00', FFFD+u'\x00'), ('F0 BF 7F', FFFD+u'\x7f'),
+ ('F0 BF C0', FFFDx2), ('F0 BF FF', FFFDx2),
+ ('F0 90 80 00', FFFD+u'\x00'), ('F0 90 80 7F', FFFD+u'\x7f'),
+ ('F0 90 80 C0', FFFDx2), ('F0 90 80 FF', FFFDx2),
+ ('F0 90 BF 00', FFFD+u'\x00'), ('F0 90 BF 7F', FFFD+u'\x7f'),
+ ('F0 90 BF C0', FFFDx2), ('F0 90 BF FF', FFFDx2),
+ ('F0 BF 80 00', FFFD+u'\x00'), ('F0 BF 80 7F', FFFD+u'\x7f'),
+ ('F0 BF 80 C0', FFFDx2), ('F0 BF 80 FF', FFFDx2),
+ ('F0 BF BF 00', FFFD+u'\x00'), ('F0 BF BF 7F', FFFD+u'\x7f'),
+ ('F0 BF BF C0', FFFDx2), ('F0 BF BF FF', FFFDx2),
+ ('F1 00', FFFD+u'\x00'), ('F1 7F', FFFD+u'\x7f'), ('F1 C0', FFFDx2),
+ ('F1 FF', FFFDx2), ('F1 80 00', FFFD+u'\x00'),
+ ('F1 80 7F', FFFD+u'\x7f'), ('F1 80 C0', FFFDx2),
+ ('F1 80 FF', FFFDx2), ('F1 BF 00', FFFD+u'\x00'),
+ ('F1 BF 7F', FFFD+u'\x7f'), ('F1 BF C0', FFFDx2),
+ ('F1 BF FF', FFFDx2), ('F1 80 80 00', FFFD+u'\x00'),
+ ('F1 80 80 7F', FFFD+u'\x7f'), ('F1 80 80 C0', FFFDx2),
+ ('F1 80 80 FF', FFFDx2), ('F1 80 BF 00', FFFD+u'\x00'),
+ ('F1 80 BF 7F', FFFD+u'\x7f'), ('F1 80 BF C0', FFFDx2),
+ ('F1 80 BF FF', FFFDx2), ('F1 BF 80 00', FFFD+u'\x00'),
+ ('F1 BF 80 7F', FFFD+u'\x7f'), ('F1 BF 80 C0', FFFDx2),
+ ('F1 BF 80 FF', FFFDx2), ('F1 BF BF 00', FFFD+u'\x00'),
+ ('F1 BF BF 7F', FFFD+u'\x7f'), ('F1 BF BF C0', FFFDx2),
+ ('F1 BF BF FF', FFFDx2), ('F3 00', FFFD+u'\x00'),
+ ('F3 7F', FFFD+u'\x7f'), ('F3 C0', FFFDx2), ('F3 FF', FFFDx2),
+ ('F3 80 00', FFFD+u'\x00'), ('F3 80 7F', FFFD+u'\x7f'),
+ ('F3 80 C0', FFFDx2), ('F3 80 FF', FFFDx2),
+ ('F3 BF 00', FFFD+u'\x00'), ('F3 BF 7F', FFFD+u'\x7f'),
+ ('F3 BF C0', FFFDx2), ('F3 BF FF', FFFDx2),
+ ('F3 80 80 00', FFFD+u'\x00'), ('F3 80 80 7F', FFFD+u'\x7f'),
+ ('F3 80 80 C0', FFFDx2), ('F3 80 80 FF', FFFDx2),
+ ('F3 80 BF 00', FFFD+u'\x00'), ('F3 80 BF 7F', FFFD+u'\x7f'),
+ ('F3 80 BF C0', FFFDx2), ('F3 80 BF FF', FFFDx2),
+ ('F3 BF 80 00', FFFD+u'\x00'), ('F3 BF 80 7F', FFFD+u'\x7f'),
+ ('F3 BF 80 C0', FFFDx2), ('F3 BF 80 FF', FFFDx2),
+ ('F3 BF BF 00', FFFD+u'\x00'), ('F3 BF BF 7F', FFFD+u'\x7f'),
+ ('F3 BF BF C0', FFFDx2), ('F3 BF BF FF', FFFDx2),
+ ('F4 00', FFFD+u'\x00'), ('F4 7F', FFFD+u'\x7f'), ('F4 90', FFFDx2),
+ ('F4 BF', FFFDx2), ('F4 C0', FFFDx2), ('F4 FF', FFFDx2),
+ ('F4 80 00', FFFD+u'\x00'), ('F4 80 7F', FFFD+u'\x7f'),
+ ('F4 80 C0', FFFDx2), ('F4 80 FF', FFFDx2),
+ ('F4 8F 00', FFFD+u'\x00'), ('F4 8F 7F', FFFD+u'\x7f'),
+ ('F4 8F C0', FFFDx2), ('F4 8F FF', FFFDx2),
+ ('F4 80 80 00', FFFD+u'\x00'), ('F4 80 80 7F', FFFD+u'\x7f'),
+ ('F4 80 80 C0', FFFDx2), ('F4 80 80 FF', FFFDx2),
+ ('F4 80 BF 00', FFFD+u'\x00'), ('F4 80 BF 7F', FFFD+u'\x7f'),
+ ('F4 80 BF C0', FFFDx2), ('F4 80 BF FF', FFFDx2),
+ ('F4 8F 80 00', FFFD+u'\x00'), ('F4 8F 80 7F', FFFD+u'\x7f'),
+ ('F4 8F 80 C0', FFFDx2), ('F4 8F 80 FF', FFFDx2),
+ ('F4 8F BF 00', FFFD+u'\x00'), ('F4 8F BF 7F', FFFD+u'\x7f'),
+ ('F4 8F BF C0', FFFDx2), ('F4 8F BF FF', FFFDx2)
+ ]
+ for seq, res in sequences:
+ seq = self.to_bytestring(seq)
+ raises(UnicodeDecodeError, self.decoder, seq, len(seq),
+ None, final=True)
+ self.checkdecodeerror(seq, 'utf-8', 0, len(seq)-1, addstuff=False,
+ msg='invalid continuation byte')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.replace_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.replace_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+ res = res.replace(FFFD, u'')
+ assert self.decoder(seq, len(seq), None, final=True,
+ errorhandler=self.ignore_handler) == (res, len(seq))
+ assert (self.decoder('aaaa' + seq + 'bbbb', len(seq) + 8, None,
+ final=True, errorhandler=self.ignore_handler) ==
+ (u'aaaa' + res + u'bbbb', len(seq) + 8))
+
def test_utf8_errors(self):
- for s in [# unexpected end of data
- "\xd7", "\xd6", "\xeb\x96", "\xf0\x90\x91"]:
- self.checkdecodeerror(s, "utf-8", 0, len(s), addstuff=False)
-
- # unexpected code byte
- for s in ["\x81", "\xbf"]:
- self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True)
+ # unexpected end of data
+ for s in ['\xd7', '\xd6', '\xeb\x96', '\xf0\x90\x91', '\xc2', '\xdf']:
+ self.checkdecodeerror(s, 'utf-8', 0, len(s), addstuff=False,
+ msg='unexpected end of data')
# invalid data 2 byte
for s in ["\xd7\x50", "\xd6\x06", "\xd6\xD6"]:
- self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True,
+ msg='invalid continuation byte')
# invalid data 3 byte
for s in ["\xeb\x56\x95", "\xeb\x06\x95", "\xeb\xD6\x95"]:
- self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True,
+ msg='invalid continuation byte')
for s in ["\xeb\x96\x55", "\xeb\x96\x05", "\xeb\x96\xD5"]:
- self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True,
+ msg='invalid continuation byte')
# invalid data 4 byte
for s in ["\xf0\x50\x91\x93", "\xf0\x00\x91\x93", "\xf0\xd0\x91\x93"]:
- self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 1, addstuff=True,
+ msg='invalid continuation byte')
for s in ["\xf0\x90\x51\x93", "\xf0\x90\x01\x93", "\xf0\x90\xd1\x93"]:
- self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 2, addstuff=True,
+ msg='invalid continuation byte')
for s in ["\xf0\x90\x91\x53", "\xf0\x90\x91\x03", "\xf0\x90\x91\xd3"]:
- self.checkdecodeerror(s, "utf-8", 0, 3, addstuff=True)
+ self.checkdecodeerror(s, "utf-8", 0, 3, addstuff=True,
+ msg='invalid continuation byte')
def test_issue8271(self):
@@ -249,97 +609,18 @@
('\x61\xF1\x80\x80\xE1\x80\xC2\x62\x80\x63\x80\xBF\x64',
u'\x61\uFFFD\uFFFD\uFFFD\x62\uFFFD\x63\uFFFD\uFFFD\x64'),
]
- def replace_handler(errors, codec, message, input, start, end):
- return FFFD, end
- def ignore_handler(errors, codec, message, input, start, end):
- return u'', end
+
for n, (seq, res) in enumerate(sequences):
decoder = self.getdecoder('utf-8')
raises(UnicodeDecodeError, decoder, seq, len(seq), None, final=True)
assert decoder(seq, len(seq), None, final=True,
- errorhandler=replace_handler) == (res, len(seq))
+ errorhandler=self.replace_handler) == (res, len(seq))
assert decoder(seq + 'b', len(seq) + 1, None, final=True,
- errorhandler=replace_handler) == (res + u'b',
- len(seq) + 1)
+ errorhandler=self.replace_handler) == (res + u'b',
+ len(seq) + 1)
res = res.replace(FFFD, u'')
assert decoder(seq, len(seq), None, final=True,
- errorhandler=ignore_handler) == (res, len(seq))
-
- def test_ascii_error(self):
- self.checkdecodeerror("abc\xFF\xFF\xFFcde", "ascii", 3, 4)
-
- def test_utf16_errors(self):
- # trunkated BOM
- for s in ["\xff", "\xfe"]:
- self.checkdecodeerror(s, "utf-16", 0, len(s), addstuff=False)
-
- for s in [
- # unexpected end of data ascii
- "\xff\xfeF",
- # unexpected end of data
- '\xff\xfe\xc0\xdb\x00', '\xff\xfe\xc0\xdb', '\xff\xfe\xc0',
- ]:
- self.checkdecodeerror(s, "utf-16", 2, len(s), addstuff=False)
- for s in [
- # illegal surrogate
- "\xff\xfe\xff\xdb\xff\xff",
- ]:
- self.checkdecodeerror(s, "utf-16", 2, 4, addstuff=False)
-
- def test_utf16_bugs(self):
- s = '\x80-\xe9\xdeL\xa3\x9b'
- py.test.raises(UnicodeDecodeError, runicode.str_decode_utf_16_le,
- s, len(s), True)
-
- def test_utf7_bugs(self):
- u = u'A\u2262\u0391.'
- assert runicode.unicode_encode_utf_7(u, len(u), None) == 'A+ImIDkQ.'
-
- def test_utf7_tofrom_utf8_bug(self):
- def _assert_decu7(input, expected):
- assert runicode.str_decode_utf_7(input, len(input), None) == (expected, len(input))
-
- _assert_decu7('+-', u'+')
- _assert_decu7('+-+-', u'++')
- _assert_decu7('+-+AOQ-', u'+\xe4')
- _assert_decu7('+AOQ-', u'\xe4')
- _assert_decu7('+AOQ-', u'\xe4')
- _assert_decu7('+AOQ- ', u'\xe4 ')
- _assert_decu7(' +AOQ-', u' \xe4')
- _assert_decu7(' +AOQ- ', u' \xe4 ')
- _assert_decu7('+AOQ-+AOQ-', u'\xe4\xe4')
-
- s_utf7 = 'Die M+AOQ-nner +AOQ-rgen sich!'
- s_utf8 = u'Die Männer ärgen sich!'
- s_utf8_esc = u'Die M\xe4nner \xe4rgen sich!'
-
- _assert_decu7(s_utf7, s_utf8_esc)
- _assert_decu7(s_utf7, s_utf8)
-
- assert runicode.unicode_encode_utf_7(s_utf8_esc, len(s_utf8_esc), None) == s_utf7
- assert runicode.unicode_encode_utf_7(s_utf8, len(s_utf8_esc), None) == s_utf7
-
- def test_utf7_partial(self):
- s = u"a+-b".encode('utf-7')
- assert s == "a+--b"
- decode = self.getdecoder('utf-7')
- assert decode(s, 1, None) == (u'a', 1)
- assert decode(s, 2, None) == (u'a', 1)
- assert decode(s, 3, None) == (u'a+', 3)
- assert decode(s, 4, None) == (u'a+-', 4)
- assert decode(s, 5, None) == (u'a+-b', 5)
-
- def test_utf7_surrogates(self):
- encode = self.getencoder('utf-7')
- u = u'\U000abcde'
- assert encode(u, len(u), None) == '+2m/c3g-'
- decode = self.getdecoder('utf-7')
- s = '+3ADYAA-'
- raises(UnicodeError, decode, s, len(s), None)
- def replace_handler(errors, codec, message, input, start, end):
- return u'?', end
- assert decode(s, len(s), None, final=True,
- errorhandler = replace_handler) == (u'??', len(s))
+ errorhandler=self.ignore_handler) == (res, len(seq))
class TestEncoding(UnicodeTests):
@@ -376,7 +657,7 @@
self.checkencode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
- self.checkencode(uni, encoding)
+ self.checkencode(uni, encoding)
def test_maxunicode(self):
uni = unichr(sys.maxunicode)
@@ -384,7 +665,7 @@
self.checkencode(uni, "utf-7")
for encoding in ("utf-8 utf-16 utf-16-be utf-16-le "
"utf-32 utf-32-be utf-32-le").split():
- self.checkencode(uni, encoding)
+ self.checkencode(uni, encoding)
def test_single_chars_utf8(self):
# check every number of bytes per char
@@ -394,7 +675,7 @@
def test_utf8_surrogates(self):
# check replacing of two surrogates by single char while encoding
# make sure that the string itself is not marshalled
- u = u"\ud800"
+ u = u"\ud800"
for i in range(4):
u += u"\udc00"
self.checkencode(u, "utf-8")
@@ -422,7 +703,7 @@
def test_utf8(self):
from pypy.rpython.test.test_llinterp import interpret
def f(x):
-
+
s1 = "".join(["\xd7\x90\xd6\x96\xeb\x96\x95\xf0\x90\x91\x93"] * x)
u, consumed = runicode.str_decode_utf_8(s1, len(s1), True)
s2 = runicode.unicode_encode_utf_8(u, len(u), True)
@@ -438,6 +719,6 @@
u = runicode.UNICHR(x)
t = runicode.ORD(u)
return t
-
+
res = interpret(f, [0x10140])
assert res == 0x10140
diff --git a/pypy/rpython/lltypesystem/lltype.py b/pypy/rpython/lltypesystem/lltype.py
--- a/pypy/rpython/lltypesystem/lltype.py
+++ b/pypy/rpython/lltypesystem/lltype.py
@@ -95,6 +95,8 @@
__slots__ = ['__dict__', '__cached_hash']
def __eq__(self, other):
+ if isinstance(other, Typedef):
+ return other.__eq__(self)
return self.__class__ is other.__class__ and (
self is other or safe_equal(self.__dict__, other.__dict__))
@@ -194,6 +196,36 @@
raise NotImplementedError
+class Typedef(LowLevelType):
+ """A typedef is just another name for an existing type"""
+ def __init__(self, OF, c_name):
+ """
+ @param OF: the equivalent rffi type
+ @param c_name: the name we want in C code
+ """
+ assert isinstance(OF, LowLevelType)
+ # Look through typedefs, so other places don't have to
+ if isinstance(OF, Typedef):
+ OF = OF.OF # haha
+ self.OF = OF
+ self.c_name = c_name
+
+ def __repr__(self):
+ return '<Typedef "%s" of %r>' % (self.c_name, self.OF)
+
+ def __eq__(self, other):
+ return other == self.OF
+
+ def __getattr__(self, name):
+ return self.OF.get(name)
+
+ def _defl(self, parent=None, parentindex=None):
+ return self.OF._defl()
+
+ def _allocate(self, initialization, parent=None, parentindex=None):
+ return self.OF._allocate(initialization, parent, parentindex)
+
+
class Struct(ContainerType):
_gckind = 'raw'
diff --git a/pypy/module/cpyext/api.py b/pypy/module/cpyext/api.py
--- a/pypy/module/cpyext/api.py
+++ b/pypy/module/cpyext/api.py
@@ -37,7 +37,7 @@
DEBUG_WRAPPER = True
# update these for other platforms
-Py_ssize_t = lltype.Signed
+Py_ssize_t = lltype.Typedef(rffi.SSIZE_T, 'Py_ssize_t')
Py_ssize_tP = rffi.CArrayPtr(Py_ssize_t)
size_t = rffi.ULONG
ADDR = lltype.Signed
@@ -192,14 +192,19 @@
- set `external` to False to get a C function pointer, but not exported by
the API headers.
"""
+ if isinstance(restype, lltype.Typedef):
+ real_restype = restype.OF
+ else:
+ real_restype = restype
+
if error is _NOT_SPECIFIED:
- if isinstance(restype, lltype.Ptr):
- error = lltype.nullptr(restype.TO)
- elif restype is lltype.Void:
+ if isinstance(real_restype, lltype.Ptr):
+ error = lltype.nullptr(real_restype.TO)
+ elif real_restype is lltype.Void:
error = CANNOT_FAIL
if type(error) is int:
- error = rffi.cast(restype, error)
- expect_integer = (isinstance(restype, lltype.Primitive) and
+ error = rffi.cast(real_restype, error)
+ expect_integer = (isinstance(real_restype, lltype.Primitive) and
rffi.cast(restype, 0) == 0)
def decorate(func):
diff --git a/pypy/rpython/lltypesystem/ll2ctypes.py b/pypy/rpython/lltypesystem/ll2ctypes.py
--- a/pypy/rpython/lltypesystem/ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/ll2ctypes.py
@@ -255,6 +255,9 @@
return cls
def build_new_ctypes_type(T, delayed_builders):
+ if isinstance(T, lltype.Typedef):
+ T = T.OF
+
if isinstance(T, lltype.Ptr):
if isinstance(T.TO, lltype.FuncType):
argtypes = [get_ctypes_type(ARG) for ARG in T.TO.ARGS
@@ -779,6 +782,8 @@
"""
if T is lltype.Void:
return None
+ if isinstance(T, lltype.Typedef):
+ T = T.OF
if isinstance(T, lltype.Ptr):
if not cobj or (
not isinstance(cobj, ctypes.c_uint32)
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -87,8 +87,9 @@
result = UnicodeBuilder(size)
pos = 0
while pos < size:
- ch = s[pos]
- ordch1 = ord(ch)
+ ordch1 = ord(s[pos])
+ # fast path for ASCII
+ # XXX maybe use a while loop here
if ordch1 < 0x80:
result.append(unichr(ordch1))
pos += 1
@@ -98,110 +99,149 @@
if pos + n > size:
if not final:
break
- else:
- endpos = pos + 1
- while endpos < size and ord(s[endpos]) & 0xC0 == 0x80:
- endpos += 1
- r, pos = errorhandler(errors, "utf-8",
- "unexpected end of data",
- s, pos, endpos)
+ charsleft = size - pos - 1 # either 0, 1, 2
+ # note: when we get the 'unexpected end of data' we don't care
+ # about the pos anymore and we just ignore the value
+ if not charsleft:
+ # there's only the start byte and nothing else
+ r, pos = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+1)
+ result.append(r)
+ break
+ ordch2 = ord(s[pos+1])
+ if n == 3:
+ # 3-bytes seq with only a continuation byte
+ if (ordch2>>6 != 0b10 or
+ (ordch1 == 0xe0 and ordch2 < 0xa0)):
+ # or (ordch1 == 0xed and ordch2 > 0x9f)
+ # second byte invalid, take the first and continue
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ else:
+ # second byte valid, but third byte missing
+ r, pos = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+2)
+ result.append(r)
+ break
+ elif n == 4:
+ # 4-bytes seq with 1 or 2 continuation bytes
+ if (ordch2>>6 != 0b10 or
+ (ordch1 == 0xf0 and ordch2 < 0x90) or
+ (ordch1 == 0xf4 and ordch2 > 0x8f)):
+ # second byte invalid, take the first and continue
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ elif charsleft == 2 and ord(s[pos+2])>>6 != 0b10:
+ # third byte invalid, take the first two and continue
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
+ continue
+ else:
+ # there's only 1 or 2 valid cb, but the others are missing
+ r, pos = errorhandler(errors, 'utf-8',
+ 'unexpected end of data',
+ s, pos, pos+charsleft+1)
+ result.append(r)
+ break
+
+ if n == 0:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid start byte',
+ s, pos, pos+1)
+ result.append(r)
+
+ elif n == 1:
+ assert 0, "ascii should have gone through the fast path"
+
+ elif n == 2:
+ ordch2 = ord(s[pos+1])
+ if ordch2>>6 != 0b10:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
result.append(r)
continue
+ # 110yyyyy 10zzzzzz -> 00000000 00000yyy yyzzzzzz
+ result.append(unichr(((ordch1 & 0b00011111) << 6) +
+ (ordch2 & 0b00111111)))
+ pos += 2
- if n == 0:
- r, pos = errorhandler(errors, "utf-8",
- "invalid start byte",
- s, pos, pos + 1)
- result.append(r)
- elif n == 1:
- assert 0, "you can never get here"
- elif n == 2:
- # 110yyyyy 10zzzzzz ====> 00000000 00000yyy yyzzzzzz
-
- ordch2 = ord(s[pos+1])
- z, two = splitter[6, 2](ordch2)
- y, six = splitter[5, 3](ordch1)
- assert six == 6
- if two != 2:
- r, pos = errorhandler(errors, "utf-8",
- "invalid continuation byte",
- s, pos, pos + 1)
- result.append(r)
- else:
- c = (y << 6) + z
- result.append(unichr(c))
- pos += n
elif n == 3:
- # 1110xxxx 10yyyyyy 10zzzzzz ====> 00000000 xxxxyyyy yyzzzzzz
ordch2 = ord(s[pos+1])
ordch3 = ord(s[pos+2])
- z, two1 = splitter[6, 2](ordch3)
- y, two2 = splitter[6, 2](ordch2)
- x, fourteen = splitter[4, 4](ordch1)
- assert fourteen == 14
- if (two1 != 2 or two2 != 2 or
+ if (ordch2>>6 != 0b10 or
(ordch1 == 0xe0 and ordch2 < 0xa0)
# surrogates shouldn't be valid UTF-8!
# Uncomment the line below to make them invalid.
# or (ordch1 == 0xed and ordch2 > 0x9f)
):
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
+ result.append(r)
+ continue
+ elif ordch3>>6 != 0b10:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
+ continue
+ # 1110xxxx 10yyyyyy 10zzzzzz -> 00000000 xxxxyyyy yyzzzzzz
+ result.append(unichr(((ordch1 & 0b00001111) << 12) +
+ ((ordch2 & 0b00111111) << 6) +
+ (ordch3 & 0b00111111)))
+ pos += 3
- # if ordch2 first two bits are 1 and 0, then the invalid
- # continuation byte is ordch3; else ordch2 is invalid.
- if two2 == 2:
- endpos = pos + 2
- else:
- endpos = pos + 1
- r, pos = errorhandler(errors, "utf-8",
- "invalid continuation byte",
- s, pos, endpos)
- result.append(r)
- else:
- c = (x << 12) + (y << 6) + z
- result.append(unichr(c))
- pos += n
elif n == 4:
- # 11110www 10xxxxxx 10yyyyyy 10zzzzzz ====>
- # 000wwwxx xxxxyyyy yyzzzzzz
ordch2 = ord(s[pos+1])
ordch3 = ord(s[pos+2])
ordch4 = ord(s[pos+3])
- z, two1 = splitter[6, 2](ordch4)
- y, two2 = splitter[6, 2](ordch3)
- x, two3 = splitter[6, 2](ordch2)
- w, thirty = splitter[3, 5](ordch1)
- assert thirty == 30
- if (two1 != 2 or two2 != 2 or two3 != 2 or
+ if (ordch2>>6 != 0b10 or
(ordch1 == 0xf0 and ordch2 < 0x90) or
(ordch1 == 0xf4 and ordch2 > 0x8f)):
- endpos = pos + 1
- if ordch2 & 0xc0 == 0x80:
- endpos += 1
- if ordch3 & 0xc0 == 0x80:
- endpos += 1
- r, pos = errorhandler(errors, "utf-8",
- "invalid continuation byte",
- s, pos, endpos)
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+1)
result.append(r)
+ continue
+ elif ordch3>>6 != 0b10:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+2)
+ result.append(r)
+ continue
+ elif ordch4>>6 != 0b10:
+ r, pos = errorhandler(errors, 'utf-8',
+ 'invalid continuation byte',
+ s, pos, pos+3)
+ result.append(r)
+ continue
+ # 11110www 10xxxxxx 10yyyyyy 10zzzzzz -> 000wwwxx xxxxyyyy yyzzzzzz
+ c = (((ordch1 & 0b00000111) << 18) +
+ ((ordch2 & 0b00111111) << 12) +
+ ((ordch3 & 0b00111111) << 6) +
+ (ordch4 & 0b00111111))
+ if c <= MAXUNICODE:
+ result.append(UNICHR(c))
else:
- c = (w << 18) + (x << 12) + (y << 6) + z
- # convert to UTF-16 if necessary
- if c <= MAXUNICODE:
- result.append(UNICHR(c))
- else:
- # compute and append the two surrogates:
- # translate from 10000..10FFFF to 0..FFFF
- c -= 0x10000
- # high surrogate = top 10 bits added to D800
- result.append(unichr(0xD800 + (c >> 10)))
- # low surrogate = bottom 10 bits added to DC00
- result.append(unichr(0xDC00 + (c & 0x03FF)))
- pos += n
- else:
- r, pos = errorhandler(errors, "utf-8",
- "unsupported Unicode code range",
- s, pos, pos + n)
- result.append(r)
+ # compute and append the two surrogates:
+ # translate from 10000..10FFFF to 0..FFFF
+ c -= 0x10000
+ # high surrogate = top 10 bits added to D800
+ result.append(unichr(0xD800 + (c >> 10)))
+ # low surrogate = bottom 10 bits added to DC00
+ result.append(unichr(0xDC00 + (c & 0x03FF)))
+ pos += 4
return result.build(), pos
@@ -629,7 +669,7 @@
3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
# sp ! " # $ % & ' ( ) * + , - . /
2, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 3, 0, 0, 0, 0,
-# 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
+# 0 1 2 3 4 5 6 7 8 9 : ; < = > ?
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0,
# @ A B C D E F G H I J K L M N O
1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
@@ -905,20 +945,20 @@
pos = 0
while pos < size:
ch = p[pos]
-
+
if ord(ch) < limit:
result.append(chr(ord(ch)))
pos += 1
else:
# startpos for collecting unencodable chars
- collstart = pos
- collend = pos+1
+ collstart = pos
+ collend = pos+1
while collend < len(p) and ord(p[collend]) >= limit:
collend += 1
r, pos = errorhandler(errors, encoding, reason, p,
collstart, collend)
result.append(r)
-
+
return result.build()
def unicode_encode_latin_1(p, size, errors, errorhandler=None):
diff --git a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
--- a/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_pypy_c_new.py
@@ -277,6 +277,7 @@
""")
def test_default_and_kw(self):
+ py.test.skip("Wait until we have saner defaults strat")
def main(n):
def f(i, j=1):
return i + j
@@ -487,7 +488,6 @@
""")
def test_range_iter(self):
- py.test.skip("until we fix defaults")
def main(n):
def g(n):
return range(n)
@@ -838,7 +838,7 @@
src = """
def main():
sa = 0
- for i in range(1000):
+ for i in range(300):
if i %s %d:
sa += 1
else:
@@ -849,7 +849,7 @@
sa += 20000
return sa
""" % (op1, a, op2, b)
- self.run_and_check(src, threshold=400)
+ self.run_and_check(src, threshold=200)
src = """
def main():
@@ -867,7 +867,7 @@
i += 0.25
return sa
""" % (op1, float(a)/4.0, op2, float(b)/4.0)
- self.run_and_check(src, threshold=400)
+ self.run_and_check(src, threshold=300)
def test_boolrewrite_allcases_reflex(self):
@@ -888,7 +888,7 @@
src = """
def main():
sa = 0
- for i in range(1000):
+ for i in range(300):
if i %s %d:
sa += 1
else:
@@ -899,7 +899,7 @@
sa += 20000
return sa
""" % (op1, a, b, op2)
- self.run_and_check(src, threshold=400)
+ self.run_and_check(src, threshold=200)
src = """
def main():
@@ -917,11 +917,13 @@
i += 0.25
return sa
""" % (op1, float(a)/4.0, float(b)/4.0, op2)
- self.run_and_check(src, threshold=400)
+ self.run_and_check(src, threshold=300)
def test_boolrewrite_ptr(self):
- # XXX this test is way too imprecise in what it is actually testing
- # it should count the number of guards instead
+ """
+ This test only checks that we get the expected result, not that any
+ optimization has been applied.
+ """
compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
for e1 in compares:
for e2 in compares:
@@ -933,7 +935,7 @@
b = tst()
c = tst()
sa = 0
- for i in range(1000):
+ for i in range(300):
if %s:
sa += 1
else:
@@ -946,7 +948,7 @@
a = b
return sa
""" % (e1, e2)
- self.run_and_check(src, threshold=400)
+ self.run_and_check(src, threshold=200)
def test_array_sum(self):
def main():
@@ -1010,7 +1012,7 @@
""")
def test_func_defaults(self):
- py.test.skip("skipped until we fix defaults")
+ py.test.skip("until we fix defaults")
def main(n):
i = 1
while i < n:
@@ -1063,7 +1065,7 @@
i23 = int_lt(0, i21)
guard_true(i23, descr=<Guard5>)
i24 = getfield_gc(p17, descr=<NonGcPtrFieldDescr .*W_ArrayTypei.inst_buffer .*>)
- i25 = getarrayitem_raw(i24, 0, descr=<SignedArrayNoLengthDescr>)
+ i25 = getarrayitem_raw(i24, 0, descr=<.*>)
i27 = int_lt(1, i21)
guard_false(i27, descr=<Guard6>)
i28 = int_add_ovf(i10, i25)
@@ -1071,3 +1073,460 @@
--TICK--
jump(p0, p1, p2, p3, p4, p5, p6, p7, p8, p9, i28, i25, i19, i13, p14, p15, descr=<Loop0>)
""")
+
+ def test_intbound_simple(self):
+ """
+ This test only checks that we get the expected result, not that any
+ optimization has been applied.
+ """
+ ops = ('<', '>', '<=', '>=', '==', '!=')
+ nbr = (3, 7)
+ for o1 in ops:
+ for o2 in ops:
+ for n1 in nbr:
+ for n2 in nbr:
+ src = '''
+ def f(i):
+ a, b = 3, 3
+ if i %s %d:
+ a = 0
+ else:
+ a = 1
+ if i %s %d:
+ b = 0
+ else:
+ b = 1
+ return a + b * 2
+
+ def main():
+ res = [0] * 4
+ idx = []
+ for i in range(15):
+ idx.extend([i] * 15)
+ for i in idx:
+ res[f(i)] += 1
+ return res
+
+ ''' % (o1, n1, o2, n2)
+ self.run_and_check(src, threshold=200)
+
+ def test_intbound_addsub_mix(self):
+ """
+ This test only checks that we get the expected result, not that any
+ optimization has been applied.
+ """
+ tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
+ 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
+ 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
+ for t1 in tests:
+ for t2 in tests:
+ src = '''
+ def f(i):
+ a, b = 3, 3
+ if %s:
+ a = 0
+ else:
+ a = 1
+ if %s:
+ b = 0
+ else:
+ b = 1
+ return a + b * 2
+
+ def main():
+ res = [0] * 4
+ idx = []
+ for i in range(15):
+ idx.extend([i] * 15)
+ for i in idx:
+ res[f(i)] += 1
+ return res
+
+ ''' % (t1, t2)
+ self.run_and_check(src, threshold=200)
+
+ def test_intbound_gt(self):
+ def main(n):
+ i, a, b = 0, 0, 0
+ while i < n:
+ if i > -1:
+ a += 1
+ if i > -2:
+ b += 1
+ i += 1
+ return (a, b)
+ #
+ log = self.run(main, [300], threshold=200)
+ assert log.result == (300, 300)
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i10 = int_lt(i8, i9)
+ guard_true(i10, descr=...)
+ i12 = int_add_ovf(i7, 1)
+ guard_no_overflow(descr=...)
+ i14 = int_add_ovf(i6, 1)
+ guard_no_overflow(descr=...)
+ i17 = int_add(i8, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, p5, i14, i12, i17, i9, descr=<Loop0>)
+ """)
+
+ def test_intbound_sub_lt(self):
+ def main():
+ i, a = 0, 0
+ while i < 300:
+ if i - 10 < 295:
+ a += 1
+ i += 1
+ return a
+ #
+ log = self.run(main, [], threshold=200)
+ assert log.result == 300
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i7 = int_lt(i5, 300)
+ guard_true(i7, descr=...)
+ i9 = int_sub_ovf(i5, 10)
+ guard_no_overflow(descr=...)
+ i11 = int_add_ovf(i4, 1)
+ guard_no_overflow(descr=...)
+ i13 = int_add(i5, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, i11, i13, descr=<Loop0>)
+ """)
+
+ def test_intbound_addsub_ge(self):
+ def main(n):
+ i, a, b = 0, 0, 0
+ while i < n:
+ if i + 5 >= 5:
+ a += 1
+ if i - 1 >= -1:
+ b += 1
+ i += 1
+ return (a, b)
+ #
+ log = self.run(main, [300], threshold=200)
+ assert log.result == (300, 300)
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i10 = int_lt(i8, i9)
+ guard_true(i10, descr=...)
+ i12 = int_add_ovf(i8, 5)
+ guard_no_overflow(descr=...)
+ i14 = int_add_ovf(i7, 1)
+ guard_no_overflow(descr=...)
+ i16 = int_add_ovf(i6, 1)
+ guard_no_overflow(descr=...)
+ i19 = int_add(i8, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, p5, i16, i14, i19, i9, descr=<Loop0>)
+ """)
+
+ def test_intbound_addmul_ge(self):
+ def main(n):
+ i, a, b = 0, 0, 0
+ while i < 300:
+ if i + 5 >= 5:
+ a += 1
+ if 2 * i >= 0:
+ b += 1
+ i += 1
+ return (a, b)
+ #
+ log = self.run(main, [300], threshold=200)
+ assert log.result == (300, 300)
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i10 = int_lt(i8, 300)
+ guard_true(i10, descr=...)
+ i12 = int_add(i8, 5)
+ i14 = int_add_ovf(i7, 1)
+ guard_no_overflow(descr=...)
+ i16 = int_lshift(i8, 1)
+ i18 = int_add_ovf(i6, 1)
+ guard_no_overflow(descr=...)
+ i21 = int_add(i8, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, p5, i18, i14, i21, descr=<Loop0>)
+ """)
+
+ def test_intbound_eq(self):
+ def main(a, n):
+ i, s = 0, 0
+ while i < 300:
+ if a == 7:
+ s += a + 1
+ elif i == 10:
+ s += i
+ else:
+ s += 1
+ i += 1
+ return s
+ #
+ log = self.run(main, [7, 300], threshold=200)
+ assert log.result == main(7, 300)
+ log = self.run(main, [10, 300], threshold=200)
+ assert log.result == main(10, 300)
+ log = self.run(main, [42, 300], threshold=200)
+ assert log.result == main(42, 300)
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i10 = int_lt(i8, 300)
+ guard_true(i10, descr=...)
+ i12 = int_eq(i8, 10)
+ guard_false(i12, descr=...)
+ i14 = int_add_ovf(i7, 1)
+ guard_no_overflow(descr=...)
+ i16 = int_add(i8, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, p5, p6, i14, i16, descr=<Loop0>)
+ """)
+
+ def test_intbound_mul(self):
+ def main(a):
+ i, s = 0, 0
+ while i < 300:
+ assert i >= 0
+ if 2 * i < 30000:
+ s += 1
+ else:
+ s += a
+ i += 1
+ return s
+ #
+ log = self.run(main, [7], threshold=200)
+ assert log.result == 300
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i8 = int_lt(i6, 300)
+ guard_true(i8, descr=...)
+ i10 = int_lshift(i6, 1)
+ i12 = int_add_ovf(i5, 1)
+ guard_no_overflow(descr=...)
+ i14 = int_add(i6, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, i12, i14, descr=<Loop0>)
+ """)
+
+ def test_assert(self):
+ def main(a):
+ i, s = 0, 0
+ while i < 300:
+ assert a == 7
+ s += a + 1
+ i += 1
+ return s
+ log = self.run(main, [7], threshold=200)
+ assert log.result == 300*8
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i8 = int_lt(i6, 300)
+ guard_true(i8, descr=...)
+ i10 = int_add_ovf(i5, 8)
+ guard_no_overflow(descr=...)
+ i12 = int_add(i6, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, p4, i10, i12, descr=<Loop0>)
+ """)
+
+ def test_zeropadded(self):
+ def main():
+ from array import array
+ class ZeroPadded(array):
+ def __new__(cls, l):
+ self = array.__new__(cls, 'd', range(l))
+ return self
+
+ def __getitem__(self, i):
+ if i < 0 or i >= len(self):
+ return 0
+ return array.__getitem__(self, i) # ID: get
+ #
+ buf = ZeroPadded(2000)
+ i = 10
+ sa = 0
+ while i < 2000 - 10:
+ sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+ i += 1
+ return sa
+
+ log = self.run(main, [], threshold=200)
+ assert log.result == 9895050.0
+ loop, = log.loops_by_filename(self.filepath)
+ #
+ # check that the overloaded __getitem__ does not introduce double
+ # array bound checks.
+ #
+ # The force_token()s are still there, but will be eliminated by the
+ # backend regalloc, so they are harmless
+ assert loop.match(ignore_ops=['force_token'],
+ expected_src="""
+ ...
+ i20 = int_ge(i18, i8)
+ guard_false(i20, descr=...)
+ f21 = getarrayitem_raw(i13, i18, descr=...)
+ f23 = getarrayitem_raw(i13, i14, descr=...)
+ f24 = float_add(f21, f23)
+ f26 = getarrayitem_raw(i13, i6, descr=...)
+ f27 = float_add(f24, f26)
+ i29 = int_add(i6, 1)
+ i31 = int_ge(i29, i8)
+ guard_false(i31, descr=...)
+ f33 = getarrayitem_raw(i13, i29, descr=...)
+ f34 = float_add(f27, f33)
+ i36 = int_add(i6, 2)
+ i38 = int_ge(i36, i8)
+ guard_false(i38, descr=...)
+ f39 = getarrayitem_raw(i13, i36, descr=...)
+ ...
+ """)
+
+
+ def test_circular(self):
+ def main():
+ from array import array
+ class Circular(array):
+ def __new__(cls):
+ self = array.__new__(cls, 'd', range(256))
+ return self
+ def __getitem__(self, i):
+ assert len(self) == 256
+ return array.__getitem__(self, i & 255)
+ #
+ buf = Circular()
+ i = 10
+ sa = 0
+ while i < 2000 - 10:
+ sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
+ i += 1
+ return sa
+ #
+ log = self.run(main, [], threshold=200)
+ assert log.result == 1239690.0
+ loop, = log.loops_by_filename(self.filepath)
+ #
+ # check that the array bound checks are removed
+ #
+ # The force_token()s are still there, but will be eliminated by the
+ # backend regalloc, so they are harmless
+ assert loop.match(ignore_ops=['force_token'],
+ expected_src="""
+ ...
+ i17 = int_and(i14, 255)
+ f18 = getarrayitem_raw(i8, i17, descr=...)
+ f20 = getarrayitem_raw(i8, i9, descr=...)
+ f21 = float_add(f18, f20)
+ f23 = getarrayitem_raw(i8, i10, descr=...)
+ f24 = float_add(f21, f23)
+ i26 = int_add(i6, 1)
+ i29 = int_and(i26, 255)
+ f30 = getarrayitem_raw(i8, i29, descr=...)
+ f31 = float_add(f24, f30)
+ i33 = int_add(i6, 2)
+ i36 = int_and(i33, 255)
+ f37 = getarrayitem_raw(i8, i36, descr=...)
+ ...
+ """)
+
+ def test_min_max(self):
+ def main():
+ i=0
+ sa=0
+ while i < 300:
+ sa+=min(max(i, 3000), 4000)
+ i+=1
+ return sa
+ log = self.run(main, [], threshold=200)
+ assert log.result == 300*3000
+ loop, = log.loops_by_filename(self.filepath)
+ assert loop.match("""
+ i7 = int_lt(i4, 300)
+ guard_true(i7, descr=...)
+ i9 = int_add_ovf(i5, 3000)
+ guard_no_overflow(descr=...)
+ i11 = int_add(i4, 1)
+ --TICK--
+ jump(p0, p1, p2, p3, i11, i9, descr=<Loop0>)
+ """)
+
+ def test_silly_max(self):
+ def main():
+ i = 2
+ sa = 0
+ while i < 300:
+ lst = range(i)
+ sa += max(*lst) # ID: max
+ i += 1
+ return sa
+ log = self.run(main, [], threshold=200)
+ assert log.result == main()
+ loop, = log.loops_by_filename(self.filepath)
+ # We dont want too many guards, but a residual call to min_max_loop
+ guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+ assert len(guards) < 20
+ assert loop.match_by_id('max',"""
+ ...
+ p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+ ...
+ """)
+
+ def test_iter_max(self):
+ def main():
+ i = 2
+ sa = 0
+ while i < 300:
+ lst = range(i)
+ sa += max(lst) # ID: max
+ i += 1
+ return sa
+ log = self.run(main, [], threshold=200)
+ assert log.result == main()
+ loop, = log.loops_by_filename(self.filepath)
+ # We dont want too many guards, but a residual call to min_max_loop
+ guards = [n for n in log.opnames(loop.ops_by_id("max")) if n.startswith('guard')]
+ assert len(guards) < 20
+ assert loop.match_by_id('max',"""
+ ...
+ p76 = call_may_force(ConstClass(min_max_loop__max), _, _, descr=...)
+ ...
+ """)
+
+ def test__ffi_call(self):
+ from pypy.rlib.test.test_libffi import get_libm_name
+ def main(libm_name):
+ try:
+ from _ffi import CDLL, types
+ except ImportError:
+ sys.stderr.write('SKIP: cannot import _ffi\n')
+ return 0
+
+ libm = CDLL(libm_name)
+ pow = libm.getfunc('pow', [types.double, types.double],
+ types.double)
+ i = 0
+ res = 0
+ while i < 300:
+ res += pow(2, 3)
+ i += 1
+ return pow.getaddr(), res
+ #
+ libm_name = get_libm_name(sys.platform)
+ log = self.run(main, [libm_name], threshold=200)
+ pow_addr, res = log.result
+ assert res == 8.0 * 300
+ loop, = log.loops_by_filename(self.filepath)
+ # XXX: write the actual test when we merge this to jitypes2
+ ## ops = self.get_by_bytecode('CALL_FUNCTION')
+ ## assert len(ops) == 2 # we get two loops, because of specialization
+ ## call_function = ops[0]
+ ## last_ops = [op.getopname() for op in call_function[-5:]]
+ ## assert last_ops == ['force_token',
+ ## 'setfield_gc',
+ ## 'call_may_force',
+ ## 'guard_not_forced',
+ ## 'guard_no_exception']
+ ## call = call_function[-3]
+ ## assert call.getarg(0).value == pow_addr
+ ## assert call.getarg(1).value == 2.0
+ ## assert call.getarg(2).value == 3.0
diff --git a/pypy/module/pypyjit/test_pypy_c/test_model.py b/pypy/module/pypyjit/test_pypy_c/test_model.py
--- a/pypy/module/pypyjit/test_pypy_c/test_model.py
+++ b/pypy/module/pypyjit/test_pypy_c/test_model.py
@@ -52,6 +52,8 @@
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
stdout, stderr = pipe.communicate()
+ if stderr.startswith('SKIP:'):
+ py.test.skip(stderr)
assert not stderr
#
# parse the JIT log
@@ -100,11 +102,11 @@
class TestOpMatcher(object):
- def match(self, src1, src2):
+ def match(self, src1, src2, **kwds):
from pypy.tool.jitlogparser.parser import SimpleParser
loop = SimpleParser.parse_from_input(src1)
matcher = OpMatcher(loop.operations, src=src1)
- return matcher.match(src2)
+ return matcher.match(src2, **kwds)
def test_match_var(self):
match_var = OpMatcher([]).match_var
@@ -234,6 +236,21 @@
"""
assert self.match(loop, expected)
+ def test_ignore_opcodes(self):
+ loop = """
+ [i0]
+ i1 = int_add(i0, 1)
+ i4 = force_token()
+ i2 = int_sub(i1, 10)
+ jump(i4)
+ """
+ expected = """
+ i1 = int_add(i0, 1)
+ i2 = int_sub(i1, 10)
+ jump(i4, descr=...)
+ """
+ assert self.match(loop, expected, ignore_ops=['force_token'])
+
class TestRunPyPyC(BaseTestPyPyC):
@@ -253,6 +270,14 @@
log = self.run(src, [30, 12])
assert log.result == 42
+ def test_skip(self):
+ import pytest
+ def f():
+ import sys
+ print >> sys.stderr, 'SKIP: foobar'
+ #
+ raises(pytest.skip.Exception, "self.run(f, [])")
+
def test_parse_jitlog(self):
def f():
i = 0
diff --git a/pypy/translator/c/database.py b/pypy/translator/c/database.py
--- a/pypy/translator/c/database.py
+++ b/pypy/translator/c/database.py
@@ -1,7 +1,7 @@
-from pypy.rpython.lltypesystem.lltype import \
- Primitive, Ptr, typeOf, RuntimeTypeInfo, \
- Struct, Array, FuncType, PyObject, Void, \
- ContainerType, OpaqueType, FixedSizeArray, _uninitialized
+
+from pypy.rpython.lltypesystem.lltype import (
+ Primitive, Ptr, typeOf, RuntimeTypeInfo, Struct, Array, FuncType, PyObject,
+ Void, ContainerType, OpaqueType, FixedSizeArray, _uninitialized, Typedef)
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.rpython.lltypesystem.llmemory import WeakRef, _WeakRefType, GCREF
from pypy.rpython.lltypesystem.llmemory import HiddenGcRef32
@@ -102,6 +102,8 @@
def gettype(self, T, varlength=1, who_asks=None, argnames=[]):
if isinstance(T, Primitive) or T == GCREF or T == HiddenGcRef32:
return PrimitiveType[T]
+ elif isinstance(T, Typedef):
+ return '%s @' % T.c_name
elif isinstance(T, Ptr):
if (isinstance(T.TO, OpaqueType) and
T.TO.hints.get('c_pointer_typedef') is not None):
diff --git a/pypy/rpython/lltypesystem/module/ll_math.py b/pypy/rpython/lltypesystem/module/ll_math.py
--- a/pypy/rpython/lltypesystem/module/ll_math.py
+++ b/pypy/rpython/lltypesystem/module/ll_math.py
@@ -56,6 +56,7 @@
math_fmod = llexternal('fmod', [rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
math_hypot = llexternal(underscore + 'hypot',
[rffi.DOUBLE, rffi.DOUBLE], rffi.DOUBLE)
+math_isinf = math_llexternal('isinf', [rffi.DOUBLE], rffi.INT)
# ____________________________________________________________
#
@@ -94,7 +95,8 @@
return y != y
def ll_math_isinf(y):
- return y != 0 and y * .5 == y
+ # Use a bitwise OR so the JIT doesn't produce 2 different guards.
+ return (y == INFINITY) | (y == -INFINITY)
ll_math_copysign = math_copysign
diff --git a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
--- a/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
+++ b/pypy/rpython/lltypesystem/test/test_ll2ctypes.py
@@ -1000,6 +1000,13 @@
p = ctypes2lltype(lltype.Ptr(NODE), ctypes.pointer(pc))
assert p.pong.ping == p
+ def test_typedef(self):
+ assert ctypes2lltype(lltype.Typedef(lltype.Signed, 'test'), 6) == 6
+ assert ctypes2lltype(lltype.Typedef(lltype.Float, 'test2'), 3.4) == 3.4
+
+ assert get_ctypes_type(lltype.Signed) == get_ctypes_type(
+ lltype.Typedef(lltype.Signed, 'test3'))
+
def test_cast_adr_to_int(self):
class someaddr(object):
def _cast_to_int(self):
@@ -1014,7 +1021,7 @@
node = lltype.malloc(NODE)
ref = lltype.cast_opaque_ptr(llmemory.GCREF, node)
back = rffi.cast(llmemory.GCREF, rffi.cast(lltype.Signed, ref))
- assert lltype.cast_opaque_ptr(lltype.Ptr(NODE), ref) == node
+ assert lltype.cast_opaque_ptr(lltype.Ptr(NODE), back) == node
def test_gcref_forth_and_back(self):
cp = ctypes.c_void_p(1234)
diff --git a/pypy/rpython/lltypesystem/rffi.py b/pypy/rpython/lltypesystem/rffi.py
--- a/pypy/rpython/lltypesystem/rffi.py
+++ b/pypy/rpython/lltypesystem/rffi.py
@@ -818,6 +818,8 @@
"""Similar to llmemory.sizeof() but tries hard to return a integer
instead of a symbolic value.
"""
+ if isinstance(tp, lltype.Typedef):
+ tp = tp.OF
if isinstance(tp, lltype.FixedSizeArray):
return sizeof(tp.OF) * tp.length
if isinstance(tp, lltype.Struct):
diff --git a/pypy/interpreter/astcompiler/test/test_astbuilder.py b/pypy/interpreter/astcompiler/test/test_astbuilder.py
--- a/pypy/interpreter/astcompiler/test/test_astbuilder.py
+++ b/pypy/interpreter/astcompiler/test/test_astbuilder.py
@@ -754,6 +754,8 @@
("{x for x in z}", "set comprehension"),
("{x : x for x in z}", "dict comprehension"),
("'str'", "literal"),
+ ("u'str'", "literal"),
+ ("b'bytes'", "literal"),
("()", "()"),
("23", "literal"),
("{}", "literal"),
diff --git a/pypy/rlib/rbigint.py b/pypy/rlib/rbigint.py
--- a/pypy/rlib/rbigint.py
+++ b/pypy/rlib/rbigint.py
@@ -3,7 +3,8 @@
from pypy.rlib.rarithmetic import most_neg_value_of_same_type
from pypy.rlib.rfloat import isinf, isnan
from pypy.rlib.debug import make_sure_not_resized, check_regular_int
-from pypy.rlib.objectmodel import we_are_translated
+from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rlib import jit
from pypy.rpython.lltypesystem import lltype, rffi
from pypy.rpython import extregistry
@@ -122,7 +123,11 @@
def numdigits(self):
return len(self._digits)
+ @staticmethod
+ @jit.purefunction
def fromint(intval):
+ # This function is marked as pure, so you must not call it and
+ # then modify the result.
check_regular_int(intval)
if intval < 0:
sign = -1
@@ -149,20 +154,25 @@
t >>= SHIFT
p += 1
return v
- fromint = staticmethod(fromint)
+ @staticmethod
+ @jit.purefunction
def frombool(b):
+ # This function is marked as pure, so you must not call it and
+ # then modify the result.
if b:
return rbigint([ONEDIGIT], 1)
return rbigint()
- frombool = staticmethod(frombool)
+ @staticmethod
def fromlong(l):
+ "NOT_RPYTHON"
return rbigint(*args_from_long(l))
- fromlong = staticmethod(fromlong)
+ @staticmethod
def fromfloat(dval):
""" Create a new bigint object from a float """
+ # This function is not marked as pure because it can raise
sign = 1
if isinf(dval) or isnan(dval):
raise OverflowError
@@ -183,16 +193,21 @@
frac -= float(bits)
frac = math.ldexp(frac, SHIFT)
return v
- fromfloat = staticmethod(fromfloat)
+ @staticmethod
+ @jit.purefunction
+ @specialize.argtype(0)
def fromrarith_int(i):
+ # This function is marked as pure, so you must not call it and
+ # then modify the result.
return rbigint(*args_from_rarith_int(i))
- fromrarith_int._annspecialcase_ = "specialize:argtype(0)"
- fromrarith_int = staticmethod(fromrarith_int)
+ @staticmethod
+ @jit.purefunction
def fromdecimalstr(s):
+ # This function is marked as pure, so you must not call it and
+ # then modify the result.
return _decimalstr_to_bigint(s)
- fromdecimalstr = staticmethod(fromdecimalstr)
def toint(self):
"""
@@ -1841,7 +1856,7 @@
elif s[p] == '+':
p += 1
- a = rbigint.fromint(0)
+ a = rbigint()
tens = 1
dig = 0
ord0 = ord('0')
@@ -1859,7 +1874,7 @@
def parse_digit_string(parser):
# helper for objspace.std.strutil
- a = rbigint.fromint(0)
+ a = rbigint()
base = parser.base
digitmax = BASE_MAX[base]
tens, dig = 1, 0
diff --git a/pypy/annotation/model.py b/pypy/annotation/model.py
--- a/pypy/annotation/model.py
+++ b/pypy/annotation/model.py
@@ -641,6 +641,8 @@
except TypeError:
s = None # unhashable T, e.g. a Ptr(GcForwardReference())
if s is None:
+ if isinstance(T, lltype.Typedef):
+ return lltype_to_annotation(T.OF)
if isinstance(T, lltype.Number):
return SomeInteger(knowntype=T._type)
if isinstance(T, (ootype.Instance, ootype.BuiltinType)):
diff --git a/.hgignore b/.hgignore
--- a/.hgignore
+++ b/.hgignore
@@ -5,6 +5,8 @@
syntax: regexp
^testresult$
^site-packages$
+^site-packages/.*$
+^site-packages/.*$
^bin$
^pypy/bin/pypy-c
^pypy/module/cpyext/src/.+\.o$
@@ -37,8 +39,6 @@
^pypy/translator/goal/.+\.dll$
^pypy/translator/goal/target.+-c$
^pypy/_cache$
-^site-packages/.+\.egg$
-^site-packages/.+\.pth$
^pypy/doc/statistic/.+\.html$
^pypy/doc/statistic/.+\.eps$
^pypy/doc/statistic/.+\.pdf$
diff --git a/pypy/rpython/lltypesystem/test/test_lltype.py b/pypy/rpython/lltypesystem/test/test_lltype.py
--- a/pypy/rpython/lltypesystem/test/test_lltype.py
+++ b/pypy/rpython/lltypesystem/test/test_lltype.py
@@ -804,6 +804,21 @@
hints={'immutable_fields': FieldListAccessor({'x':'[*]'})})
assert S._immutable_field('x') == '[*]'
+def test_typedef():
+ T = Typedef(Signed, 'T')
+ assert T == Signed
+ assert Signed == T
+ T2 = Typedef(T, 'T2')
+ assert T2 == T
+ assert T2.OF is Signed
+ py.test.raises(TypeError, Ptr, T)
+ assert rffi.CArrayPtr(T) == rffi.CArrayPtr(Signed)
+ assert rffi.CArrayPtr(Signed) == rffi.CArrayPtr(T)
+
+ F = FuncType((T,), T)
+ assert F.RESULT == Signed
+ assert F.ARGS == (Signed,)
+
class TestTrackAllocation:
def test_automatic_tracking(self):
diff --git a/pypy/module/pypyjit/test_pypy_c/model.py b/pypy/module/pypyjit/test_pypy_c/model.py
--- a/pypy/module/pypyjit/test_pypy_c/model.py
+++ b/pypy/module/pypyjit/test_pypy_c/model.py
@@ -153,10 +153,10 @@
for op in self._ops_for_chunk(chunk, include_debug_merge_points):
yield op
- def match(self, expected_src):
+ def match(self, expected_src, **kwds):
ops = list(self.allops())
matcher = OpMatcher(ops, src=self.format_ops())
- return matcher.match(expected_src)
+ return matcher.match(expected_src, **kwds)
def match_by_id(self, id, expected_src, **kwds):
ops = list(self.ops_by_id(id, **kwds))
@@ -314,7 +314,7 @@
# it matched! The '...' operator ends here
return op
- def match_loop(self, expected_ops):
+ def match_loop(self, expected_ops, ignore_ops):
"""
A note about partial matching: the '...' operator is non-greedy,
i.e. it matches all the operations until it finds one that matches
@@ -333,13 +333,16 @@
return
op = self.match_until(exp_op, iter_ops)
else:
- op = self._next_op(iter_ops)
+ while True:
+ op = self._next_op(iter_ops)
+ if op.name not in ignore_ops:
+ break
self.match_op(op, exp_op)
#
# make sure we exhausted iter_ops
self._next_op(iter_ops, assert_raises=True)
- def match(self, expected_src):
+ def match(self, expected_src, ignore_ops=[]):
def format(src):
if src is None:
return ''
@@ -348,7 +351,7 @@
expected_src = self.preprocess_expected_src(expected_src)
expected_ops = self.parse_ops(expected_src)
try:
- self.match_loop(expected_ops)
+ self.match_loop(expected_ops, ignore_ops)
except InvalidMatch, e:
#raise # uncomment this and use py.test --pdb for better debugging
print '@' * 40
@@ -357,6 +360,7 @@
print e.args
print e.msg
print
+ print "Ignore ops:", ignore_ops
print "Got:"
print format(self.src)
print
diff --git a/pypy/translator/c/test/test_database.py b/pypy/translator/c/test/test_database.py
--- a/pypy/translator/c/test/test_database.py
+++ b/pypy/translator/c/test/test_database.py
@@ -5,7 +5,7 @@
from pypy.objspace.flow.model import Constant, Variable, SpaceOperation
from pypy.objspace.flow.model import Block, Link, FunctionGraph
from pypy.rpython.typesystem import getfunctionptr
-from pypy.rpython.lltypesystem.rffi import VOIDP, INT_real, INT
+from pypy.rpython.lltypesystem.rffi import VOIDP, INT_real, INT, CArrayPtr
def dump_on_stdout(database):
@@ -244,3 +244,15 @@
db.get(p)
db.complete()
dump_on_stdout(db)
+
+def test_typedef():
+ A = Typedef(Signed, 'test4')
+ db = LowLevelDatabase()
+ assert db.gettype(A) == "test4 @"
+
+ PA = CArrayPtr(A)
+ assert db.gettype(PA) == "test4 *@"
+
+ F = FuncType((A,), A)
+ assert db.gettype(F) == "test4 (@)(test4)"
+
diff --git a/pypy/module/pypyjit/test/test_pypy_c.py b/pypy/module/pypyjit/test/test_pypy_c.py
--- a/pypy/module/pypyjit/test/test_pypy_c.py
+++ b/pypy/module/pypyjit/test/test_pypy_c.py
@@ -198,44 +198,6 @@
print
print '@' * 79
- def test_f1(self):
- self.run_source('''
- def main(n):
- "Arbitrary test function."
- i = 0
- x = 1
- while i<n:
- j = 0 #ZERO
- while j<=i:
- j = j + 1
- x = x + (i&j)
- i = i + 1
- return x
- ''', 220,
- ([2117], 1083876708))
-
- def test_factorial(self):
- self.run_source('''
- def main(n):
- r = 1
- while n > 1:
- r *= n
- n -= 1
- return r
- ''', 28,
- ([5], 120),
- ([25], 15511210043330985984000000L))
-
- def test_factorialrec(self):
- self.run_source('''
- def main(n):
- if n > 1:
- return n * main(n-1)
- else:
- return 1
- ''', 0,
- ([5], 120),
- ([25], 15511210043330985984000000L))
def test_richards(self):
self.run_source('''
@@ -247,529 +209,6 @@
''' % (sys.path,), 7200,
([], 42))
- def test_simple_call(self):
- self.run_source('''
- OFFSET = 0
- def f(i):
- return i + 1 + OFFSET
- def main(n):
- i = 0
- while i < n+OFFSET:
- i = f(f(i))
- return i
- ''', 98,
- ([20], 20),
- ([31], 32))
- ops = self.get_by_bytecode("LOAD_GLOBAL", True)
- assert len(ops) == 5
- assert ops[0].get_opnames() == ["guard_value",
- "getfield_gc", "guard_value",
- "getfield_gc", "guard_isnull",
- "getfield_gc", "guard_nonnull_class"]
- # the second getfield on the same globals is quicker
- assert ops[1].get_opnames() == ["getfield_gc", "guard_nonnull_class"]
- assert not ops[2] # second LOAD_GLOBAL of the same name folded away
- # LOAD_GLOBAL of the same name but in different function partially
- # folded away
- # XXX could be improved
- assert ops[3].get_opnames() == ["guard_value",
- "getfield_gc", "guard_isnull"]
- assert not ops[4]
- ops = self.get_by_bytecode("CALL_FUNCTION", True)
- assert len(ops) == 2
- for i, bytecode in enumerate(ops):
- if i == 0:
- assert "call(getexecutioncontext)" in str(bytecode)
- else:
- assert not bytecode.get_opnames("call")
- assert not bytecode.get_opnames("new")
- assert len(bytecode.get_opnames("guard")) <= 10
-
- ops = self.get_by_bytecode("LOAD_GLOBAL")
- assert len(ops) == 5
- for bytecode in ops:
- assert not bytecode
-
- ops = self.get_by_bytecode("CALL_FUNCTION")
- assert len(ops) == 2
- for bytecode in ops:
- assert len(bytecode) <= 1
-
-
- def test_method_call(self):
- self.run_source('''
- class A(object):
- def __init__(self, a):
- self.a = a
- def f(self, i):
- return self.a + i
- def main(n):
- i = 0
- a = A(1)
- while i < n:
- x = a.f(i)
- i = a.f(x)
- return i
- ''', 93,
- ([20], 20),
- ([31], 32))
- ops = self.get_by_bytecode("LOOKUP_METHOD", True)
- assert len(ops) == 2
- assert not ops[0].get_opnames("call")
- assert not ops[0].get_opnames("new")
- assert len(ops[0].get_opnames("guard")) <= 3
- assert not ops[1] # second LOOKUP_METHOD folded away
-
- ops = self.get_by_bytecode("LOOKUP_METHOD")
- assert not ops[0] # first LOOKUP_METHOD folded away
- assert not ops[1] # second LOOKUP_METHOD folded away
-
- ops = self.get_by_bytecode("CALL_METHOD", True)
- assert len(ops) == 2
- for i, bytecode in enumerate(ops):
- if i == 0:
- assert "call(getexecutioncontext)" in str(bytecode)
- else:
- assert not bytecode.get_opnames("call")
- assert not bytecode.get_opnames("new")
- assert len(bytecode.get_opnames("guard")) <= 6
- assert len(ops[1]) < len(ops[0])
-
- ops = self.get_by_bytecode("CALL_METHOD")
- assert len(ops) == 2
- assert len(ops[0]) <= 1
- assert len(ops[1]) <= 1
-
- ops = self.get_by_bytecode("LOAD_ATTR", True)
- assert len(ops) == 2
- # With mapdict, we get fast access to (so far) the 5 first
- # attributes, which means it is done with only the following
- # operations. (For the other attributes there is additionally
- # a getarrayitem_gc.)
- assert ops[0].get_opnames() == ["getfield_gc",
- "guard_nonnull_class"]
- assert not ops[1] # second LOAD_ATTR folded away
-
- ops = self.get_by_bytecode("LOAD_ATTR")
- assert not ops[0] # first LOAD_ATTR folded away
- assert not ops[1] # second LOAD_ATTR folded away
-
- def test_static_classmethod_call(self):
- self.run_source('''
- class A(object):
- @classmethod
- def f(cls, i):
- return i + (cls is A) + 1
-
- @staticmethod
- def g(i):
- return i - 1
-
- def main(n):
- i = 0
- a = A()
- while i < n:
- x = a.f(i)
- i = a.g(x)
- return i
- ''', 106,
- ([20], 20),
- ([31], 31))
- ops = self.get_by_bytecode("LOOKUP_METHOD")
- assert len(ops) == 2
- assert not ops[0].get_opnames("call")
- assert not ops[0].get_opnames("new")
- assert len(ops[0].get_opnames("guard")) <= 2
- assert len(ops[0].get_opnames("getfield")) <= 4
- assert not ops[1] # second LOOKUP_METHOD folded away
-
- def test_default_and_kw(self):
- self.run_source('''
- def f(i, j=1):
- return i + j
- def main(n):
- i = 0
- while i < n:
- i = f(f(i), j=1)
- return i
- ''', 100,
- ([20], 20),
- ([31], 32))
- ops = self.get_by_bytecode("CALL_FUNCTION")
- assert len(ops) == 2
- for i, bytecode in enumerate(ops):
- assert not bytecode.get_opnames("call")
- assert not bytecode.get_opnames("new")
- assert len(ops[0].get_opnames("guard")) <= 14
- assert len(ops[1].get_opnames("guard")) <= 3
-
- ops = self.get_by_bytecode("CALL_FUNCTION", True)
- assert len(ops) == 2
- for i, bytecode in enumerate(ops):
- if i == 0:
- assert "call(getexecutioncontext)" in str(bytecode)
- else:
- assert not bytecode.get_opnames("call")
- assert not bytecode.get_opnames("new")
- assert len(ops[0].get_opnames("guard")) <= 14
- assert len(ops[1].get_opnames("guard")) <= 3
-
- def test_kwargs(self):
- self.run_source('''
- d = {}
-
- def g(**args):
- return len(args)
-
- def main(x):
- s = 0
- d = {}
- for i in range(x):
- s += g(**d)
- d[str(i)] = i
- if i % 100 == 99:
- d = {}
- return s
- ''', 100000, ([100], 4950),
- ([1000], 49500),
- ([10000], 495000),
- ([100000], 4950000))
- assert len(self.rawloops) + len(self.rawentrybridges) == 4
- op, = self.get_by_bytecode("CALL_FUNCTION_KW")
- # XXX a bit too many guards, but better than before
- assert len(op.get_opnames("guard")) <= 12
-
- def test_stararg_virtual(self):
- self.run_source('''
- d = {}
-
- def g(*args):
- return len(args)
- def h(a, b, c):
- return c
-
- def main(x):
- s = 0
- for i in range(x):
- l = [i, x, 2]
- s += g(*l)
- s += h(*l)
- s += g(i, x, 2)
- for i in range(x):
- l = [x, 2]
- s += g(i, *l)
- s += h(i, *l)
- return s
- ''', 100000, ([100], 1300),
- ([1000], 13000),
- ([10000], 130000),
- ([100000], 1300000))
- assert len(self.loops) == 2
- ops = self.get_by_bytecode("CALL_FUNCTION_VAR")
- assert len(ops) == 4
- for op in ops:
- assert len(op.get_opnames("new")) == 0
- assert len(op.get_opnames("call_may_force")) == 0
-
- ops = self.get_by_bytecode("CALL_FUNCTION")
- for op in ops:
- assert len(op.get_opnames("new")) == 0
- assert len(op.get_opnames("call_may_force")) == 0
-
- def test_stararg(self):
- self.run_source('''
- d = {}
-
- def g(*args):
- return args[-1]
- def h(*args):
- return len(args)
-
- def main(x):
- s = 0
- l = []
- i = 0
- while i < x:
- l.append(1)
- s += g(*l)
- i = h(*l)
- return s
- ''', 100000, ([100], 100),
- ([1000], 1000),
- ([2000], 2000),
- ([4000], 4000))
- assert len(self.loops) == 1
- ops = self.get_by_bytecode("CALL_FUNCTION_VAR")
- for op in ops:
- assert len(op.get_opnames("new_with_vtable")) == 0
- assert len(op.get_opnames("call_may_force")) == 0
-
- def test_virtual_instance(self):
- self.run_source('''
- class A(object):
- pass
- def main(n):
- i = 0
- while i < n:
- a = A()
- assert isinstance(a, A)
- assert not isinstance(a, int)
- a.x = 2
- i = i + a.x
- return i
- ''', 69,
- ([20], 20),
- ([31], 32))
-
- callA, callisinstance1, callisinstance2 = (
- self.get_by_bytecode("CALL_FUNCTION"))
- assert not callA.get_opnames("call")
- assert not callA.get_opnames("new")
- assert len(callA.get_opnames("guard")) <= 2
- assert not callisinstance1.get_opnames("call")
- assert not callisinstance1.get_opnames("new")
- assert len(callisinstance1.get_opnames("guard")) <= 2
- # calling isinstance on a builtin type gives zero guards
- # because the version_tag of a builtin type is immutable
- assert not len(callisinstance1.get_opnames("guard"))
-
-
- bytecode, = self.get_by_bytecode("STORE_ATTR")
- assert bytecode.get_opnames() == []
-
- def test_load_attr(self):
- self.run_source('''
- class A(object):
- pass
- a = A()
- a.x = 2
- def main(n):
- i = 0
- while i < n:
- i = i + a.x
- return i
- ''', 41,
- ([20], 20),
- ([31], 32))
-
- load, = self.get_by_bytecode("LOAD_ATTR")
- # 1 guard_value for the class
- # 1 guard_value for the version_tag
- # 1 guard_value for the structure
- # 1 guard_nonnull_class for the result since it is used later
- assert len(load.get_opnames("guard")) <= 4
-
- def test_mixed_type_loop(self):
- self.run_source('''
- class A(object):
- pass
- def main(n):
- i = 0.0
- j = 2
- while i < n:
- i = j + i
- return i, type(i) is float
- ''', 35,
- ([20], (20, True)),
- ([31], (32, True)))
-
- bytecode, = self.get_by_bytecode("BINARY_ADD")
- assert not bytecode.get_opnames("call")
- assert not bytecode.get_opnames("new")
- assert len(bytecode.get_opnames("guard")) <= 2
-
- def test_call_builtin_function(self):
- self.run_source('''
- class A(object):
- pass
- def main(n):
- i = 2
- l = []
- while i < n:
- i += 1
- l.append(i)
- return i, len(l)
- ''', 39,
- ([20], (20, 18)),
- ([31], (31, 29)))
-
- bytecode, = self.get_by_bytecode("CALL_METHOD")
- assert len(bytecode.get_opnames("new_with_vtable")) == 1 # the forcing of the int
- assert len(bytecode.get_opnames("call")) == 1 # the call to append
- assert len(bytecode.get_opnames("guard")) == 1 # guard for guard_no_exception after the call
- bytecode, = self.get_by_bytecode("CALL_METHOD", True)
- assert len(bytecode.get_opnames("guard")) == 2 # guard for profiling disabledness + guard_no_exception after the call
-
- def test_range_iter(self):
- self.run_source('''
- def g(n):
- return range(n)
-
- def main(n):
- s = 0
- for i in range(n):
- s += g(n)[i]
- return s
- ''', 143, ([1000], 1000 * 999 / 2))
- bytecode, = self.get_by_bytecode("BINARY_SUBSCR", True)
- assert bytecode.get_opnames("guard") == [
- "guard_false", # check that the index is >= 0
- "guard_false", # check that the index is lower than the current length
- ]
- bytecode, _ = self.get_by_bytecode("FOR_ITER", True) # second bytecode is the end of the loop
- assert bytecode.get_opnames("guard") == [
- "guard_value",
- "guard_class", # check the class of the iterator
- "guard_nonnull", # check that the iterator is not finished
- "guard_isnull", # check that the range list is not forced
- "guard_false", # check that the index is lower than the current length
- ]
-
- bytecode, = self.get_by_bytecode("BINARY_SUBSCR")
- assert bytecode.get_opnames("guard") == [
- "guard_false", # check that the index is >= 0
- "guard_false", # check that the index is lower than the current length
- ]
- bytecode, _ = self.get_by_bytecode("FOR_ITER") # second bytecode is the end of the loop
- assert bytecode.get_opnames("guard") == [
- "guard_false", # check that the index is lower than the current length
- ]
-
- def test_exception_inside_loop_1(self):
- self.run_source('''
- def main(n):
- while n:
- try:
- raise ValueError
- except ValueError:
- pass
- n -= 1
- return n
- ''', 33,
- ([30], 0))
-
- bytecode, = self.get_by_bytecode("SETUP_EXCEPT")
- #assert not bytecode.get_opnames("new") -- currently, we have
- # new_with_vtable(pypy.interpreter.pyopcode.ExceptBlock)
- bytecode, = self.get_by_bytecode("RAISE_VARARGS")
- assert not bytecode.get_opnames("new")
- bytecode, = self.get_by_bytecode("COMPARE_OP")
- assert not bytecode.get_opnames()
-
- def test_exception_inside_loop_2(self):
- self.run_source('''
- def g(n):
- raise ValueError(n)
- def f(n):
- g(n)
- def main(n):
- while n:
- try:
- f(n)
- except ValueError:
- pass
- n -= 1
- return n
- ''', 51,
- ([30], 0))
-
- bytecode, = self.get_by_bytecode("RAISE_VARARGS")
- assert not bytecode.get_opnames("new")
- bytecode, = self.get_by_bytecode("COMPARE_OP")
- assert len(bytecode.get_opnames()) <= 2 # oois, guard_true
-
- def test_chain_of_guards(self):
- self.run_source('''
- class A(object):
- def method_x(self):
- return 3
-
- l = ["x", "y"]
-
- def main(arg):
- sum = 0
- a = A()
- i = 0
- while i < 2000:
- name = l[arg]
- sum += getattr(a, 'method_' + name)()
- i += 1
- return sum
- ''', 3000, ([0], 2000*3))
- assert len(self.loops) == 1
-
- def test_getattr_with_dynamic_attribute(self):
- self.run_source('''
- class A(object):
- pass
-
- l = ["x", "y"]
-
- def main(arg):
- sum = 0
- a = A()
- a.a1 = 0
- a.a2 = 0
- a.a3 = 0
- a.a4 = 0
- a.a5 = 0 # workaround, because the first five attributes need a promotion
- a.x = 1
- a.y = 2
- i = 0
- while i < 2000:
- name = l[i % 2]
- sum += getattr(a, name)
- i += 1
- return sum
- ''', 3000, ([0], 3000))
- assert len(self.loops) == 1
-
- def test_blockstack_virtualizable(self):
- self.run_source('''
- from pypyjit import residual_call
-
- def main():
- i = 0
- while i < 100:
- try:
- residual_call(len, [])
- except:
- pass
- i += 1
- return i
- ''', 1000, ([], 100))
- bytecode, = self.get_by_bytecode("CALL_FUNCTION")
- # we allocate virtual ref and frame, we don't want block
- assert len(bytecode.get_opnames('new_with_vtable')) == 2
-
- def test_import_in_function(self):
- self.run_source('''
- def main():
- i = 0
- while i < 100:
- from sys import version
- i += 1
- return i
- ''', 100, ([], 100))
- bytecode, = self.get_by_bytecode('IMPORT_NAME')
- bytecode2, = self.get_by_bytecode('IMPORT_FROM')
- assert len(bytecode.get_opnames('call')) == 2 # split_chr and list_pop
- assert len(bytecode2.get_opnames('call')) == 0
-
- def test_arraycopy_disappears(self):
- self.run_source('''
- def main():
- i = 0
- while i < 100:
- t = (1, 2, 3, i + 1)
- t2 = t[:]
- del t
- i = t2[3]
- del t2
- return i
- ''', 40, ([], 100))
- bytecode, = self.get_by_bytecode('BINARY_SUBSCR')
- assert len(bytecode.get_opnames('new_array')) == 0
def test_overflow_checking(self):
startvalue = sys.maxint - 2147483647
@@ -784,514 +223,6 @@
return total
''' % startvalue, 170, ([], startvalue + 4999450000L))
- def test_boolrewrite_invers(self):
- for a, b, res, ops in (('2000', '2000', 20001000, 51),
- ( '500', '500', 15001500, 81),
- ( '300', '600', 16001700, 83),
- ( 'a', 'b', 16001700, 89),
- ( 'a', 'a', 13001700, 85)):
-
- self.run_source('''
- def main():
- sa = 0
- a = 300
- b = 600
- for i in range(1000):
- if i < %s: sa += 1
- else: sa += 2
- if i >= %s: sa += 10000
- else: sa += 20000
- return sa
- '''%(a, b), ops, ([], res))
-
- def test_boolrewrite_reflex(self):
- for a, b, res, ops in (('2000', '2000', 10001000, 51),
- ( '500', '500', 15001500, 81),
- ( '300', '600', 14001700, 83),
- ( 'a', 'b', 14001700, 89),
- ( 'a', 'a', 17001700, 85)):
-
- self.run_source('''
- def main():
- sa = 0
- a = 300
- b = 600
- for i in range(1000):
- if i < %s: sa += 1
- else: sa += 2
- if %s > i: sa += 10000
- else: sa += 20000
- return sa
- '''%(a, b), ops, ([], res))
-
-
- def test_boolrewrite_correct_invers(self):
- def opval(i, op, a):
- if eval('%d %s %d' % (i, op, a)): return 1
- return 2
-
- ops = ('<', '>', '<=', '>=', '==', '!=')
- for op1 in ops:
- for op2 in ops:
- for a,b in ((500, 500), (300, 600)):
- res = 0
- res += opval(a-1, op1, a) * (a)
- res += opval( a, op1, a)
- res += opval(a+1, op1, a) * (1000 - a - 1)
- res += opval(b-1, op2, b) * 10000 * (b)
- res += opval( b, op2, b) * 10000
- res += opval(b+1, op2, b) * 10000 * (1000 - b - 1)
-
- self.run_source('''
- def main():
- sa = 0
- for i in range(1000):
- if i %s %d: sa += 1
- else: sa += 2
- if i %s %d: sa += 10000
- else: sa += 20000
- return sa
- '''%(op1, a, op2, b), 83, ([], res))
-
- self.run_source('''
- def main():
- sa = 0
- i = 0.0
- while i < 250.0:
- if i %s %f: sa += 1
- else: sa += 2
- if i %s %f: sa += 10000
- else: sa += 20000
- i += 0.25
- return sa
- '''%(op1, float(a)/4.0, op2, float(b)/4.0), 156, ([], res))
-
-
- def test_boolrewrite_correct_reflex(self):
- def opval(i, op, a):
- if eval('%d %s %d' % (i, op, a)): return 1
- return 2
-
- ops = ('<', '>', '<=', '>=', '==', '!=')
- for op1 in ops:
- for op2 in ops:
- for a,b in ((500, 500), (300, 600)):
- res = 0
- res += opval(a-1, op1, a) * (a)
- res += opval( a, op1, a)
- res += opval(a+1, op1, a) * (1000 - a - 1)
- res += opval(b, op2, b-1) * 10000 * (b)
- res += opval(b, op2, b) * 10000
- res += opval(b, op2, b+1) * 10000 * (1000 - b - 1)
-
- self.run_source('''
- def main():
- sa = 0
- for i in range(1000):
- if i %s %d: sa += 1
- else: sa += 2
- if %d %s i: sa += 10000
- else: sa += 20000
- return sa
- '''%(op1, a, b, op2), 83, ([], res))
-
- self.run_source('''
- def main():
- sa = 0
- i = 0.0
- while i < 250.0:
- if i %s %f: sa += 1
- else: sa += 2
- if %f %s i: sa += 10000
- else: sa += 20000
- i += 0.25
- return sa
- '''%(op1, float(a)/4.0, float(b)/4.0, op2), 156, ([], res))
-
- def test_boolrewrite_ptr(self):
- # XXX this test is way too imprecise in what it is actually testing
- # it should count the number of guards instead
- compares = ('a == b', 'b == a', 'a != b', 'b != a', 'a == c', 'c != b')
- for e1 in compares:
- for e2 in compares:
- a, b, c = 1, 2, 3
- if eval(e1): res = 752 * 1
- else: res = 752 * 2
- if eval(e2): res += 752 * 10000
- else: res += 752 * 20000
- a = b
- if eval(e1): res += 248 * 1
- else: res += 248 * 2
- if eval(e2): res += 248 * 10000
- else: res += 248 * 20000
-
-
- if 'c' in e1 or 'c' in e2:
- n = 337
- else:
- n = 215
-
- print
- print 'Test:', e1, e2, n, res
- self.run_source('''
- class tst(object):
- pass
- def main():
- a = tst()
- b = tst()
- c = tst()
- sa = 0
- for i in range(1000):
- if %s: sa += 1
- else: sa += 2
- if %s: sa += 10000
- else: sa += 20000
- if i > 750: a = b
- return sa
- '''%(e1, e2), n, ([], res))
-
- def test_array_sum(self):
- for tc, maxops in zip('bhilBHILfd', (38,) * 6 + (40, 40, 41, 38)):
- res = 19352859
- if tc == 'L':
- res = long(res)
- elif tc in 'fd':
- res = float(res)
- elif tc == 'I' and sys.maxint == 2147483647:
- res = long(res)
- # note: in CPython we always get longs here, even on 64-bits
-
- self.run_source('''
- from array import array
-
- def main():
- img = array("%s", range(127) * 5) * 484
- l, i = 0, 0
- while i < 640 * 480:
- l += img[i]
- i += 1
- return l
- ''' % tc, maxops, ([], res))
-
- def test_array_sum_char(self):
- self.run_source('''
- from array import array
-
- def main():
- img = array("c", "Hello") * 130 * 480
- l, i = 0, 0
- while i < 640 * 480:
- l += ord(img[i])
- i += 1
- return l
- ''', 60, ([], 30720000))
-
- def test_array_sum_unicode(self):
- self.run_source('''
- from array import array
-
- def main():
- img = array("u", u"Hello") * 130 * 480
- l, i = 0, 0
- while i < 640 * 480:
- if img[i] == u"l":
- l += 1
- i += 1
- return l
- ''', 65, ([], 122880))
-
- def test_array_intimg(self):
- # XXX this test is way too imprecise in what it is actually testing
- # it should count the number of guards instead
- for tc, maxops in zip('ilILd', (67, 67, 70, 70, 61)):
- print
- print '='*65
- print '='*20, 'running test for tc=%r' % (tc,), '='*20
- res = 73574560
- if tc == 'L':
- res = long(res)
- elif tc in 'fd':
- res = float(res)
- elif tc == 'I' and sys.maxint == 2147483647:
- res = long(res)
- # note: in CPython we always get longs here, even on 64-bits
-
- self.run_source('''
- from array import array
-
- def main(tc):
- img = array(tc, range(3)) * (350 * 480)
- intimg = array(tc, (0,)) * (640 * 480)
- l, i = 0, 640
- while i < 640 * 480:
- l = l + img[i]
- intimg[i] = (intimg[i-640] + l)
- i += 1
- return intimg[i - 1]
- ''', maxops, ([tc], res))
-
- def test_unpackiterable(self):
- self.run_source('''
- from array import array
-
- def main():
- i = 0
- t = array('l', (1, 2))
- while i < 2000:
- a, b = t
- i += 1
- return 3
-
- ''', 100, ([], 3))
- bytecode, = self.get_by_bytecode("UNPACK_SEQUENCE")
- # we allocate virtual ref and frame, we don't want block
- assert len(bytecode.get_opnames('call_may_force')) == 0
-
-
- def test_intbound_simple(self):
- ops = ('<', '>', '<=', '>=', '==', '!=')
- nbr = (3, 7)
- for o1 in ops:
- for o2 in ops:
- for n1 in nbr:
- for n2 in nbr:
- src = '''
- def f(i):
- a, b = 3, 3
- if i %s %d:
- a = 0
- else:
- a = 1
- if i %s %d:
- b = 0
- else:
- b = 1
- return a + b * 2
-
- def main():
- res = [0] * 4
- idx = []
- for i in range(15):
- idx.extend([i] * 1500)
- for i in idx:
- res[f(i)] += 1
- return res
-
- ''' % (o1, n1, o2, n2)
-
- exec(str(py.code.Source(src)))
- res = [0] * 4
- for i in range(15):
- res[f(i)] += 1500
- self.run_source(src, 268, ([], res))
-
- def test_intbound_addsub_mix(self):
- tests = ('i > 4', 'i > 2', 'i + 1 > 2', '1 + i > 4',
- 'i - 1 > 1', '1 - i > 1', '1 - i < -3',
- 'i == 1', 'i == 5', 'i != 1', '-2 * i < -4')
- for t1 in tests:
- for t2 in tests:
- print t1, t2
- src = '''
- def f(i):
- a, b = 3, 3
- if %s:
- a = 0
- else:
- a = 1
- if %s:
- b = 0
- else:
- b = 1
- return a + b * 2
-
- def main():
- res = [0] * 4
- idx = []
- for i in range(15):
- idx.extend([i] * 1500)
- for i in idx:
- res[f(i)] += 1
- return res
-
- ''' % (t1, t2)
-
- exec(str(py.code.Source(src)))
- res = [0] * 4
- for i in range(15):
- res[f(i)] += 1500
- self.run_source(src, 280, ([], res))
-
- def test_intbound_gt(self):
- self.run_source('''
- def main():
- i, a, b = 0, 0, 0
- while i < 2000:
- if i > -1:
- a += 1
- if i > -2:
- b += 1
- i += 1
- return (a, b)
- ''', 48, ([], (2000, 2000)))
-
- def test_intbound_sub_lt(self):
- self.run_source('''
- def main():
- i, a, b = 0, 0, 0
- while i < 2000:
- if i - 10 < 1995:
- a += 1
- i += 1
- return (a, b)
- ''', 38, ([], (2000, 0)))
-
- def test_intbound_addsub_ge(self):
- self.run_source('''
- def main():
- i, a, b = 0, 0, 0
- while i < 2000:
- if i + 5 >= 5:
- a += 1
- if i - 1 >= -1:
- b += 1
- i += 1
- return (a, b)
- ''', 56, ([], (2000, 2000)))
-
- def test_intbound_addmul_ge(self):
- self.run_source('''
- def main():
- i, a, b = 0, 0, 0
- while i < 2000:
- if i + 5 >= 5:
- a += 1
- if 2 * i >= 0:
- b += 1
- i += 1
- return (a, b)
- ''', 53, ([], (2000, 2000)))
-
- def test_intbound_eq(self):
- self.run_source('''
- def main(a):
- i, s = 0, 0
- while i < 1500:
- if a == 7:
- s += a + 1
- elif i == 10:
- s += i
- else:
- s += 1
- i += 1
- return s
- ''', 69, ([7], 12000), ([42], 1509), ([10], 1509))
-
- def test_intbound_mul(self):
- self.run_source('''
- def main(a):
- i, s = 0, 0
- while i < 1500:
- assert i >= 0
- if 2 * i < 30000:
- s += 1
- else:
- s += a
- i += 1
- return s
- ''', 43, ([7], 1500))
-
- def test_assert(self):
- self.run_source('''
- def main(a):
- i, s = 0, 0
- while i < 1500:
- assert a == 7
- s += a + 1
- i += 1
- return s
- ''', 38, ([7], 8*1500))
-
- def test_zeropadded(self):
- self.run_source('''
- from array import array
- class ZeroPadded(array):
- def __new__(cls, l):
- self = array.__new__(cls, 'd', range(l))
- return self
-
- def __getitem__(self, i):
- if i < 0 or i >= self.__len__():
- return 0
- return array.__getitem__(self, i)
-
-
- def main():
- buf = ZeroPadded(2000)
- i = 10
- sa = 0
- while i < 2000 - 10:
- sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
- i += 1
- return sa
-
- ''', 232, ([], 9895050.0))
-
- def test_circular(self):
- self.run_source('''
- from array import array
- class Circular(array):
- def __new__(cls):
- self = array.__new__(cls, 'd', range(256))
- return self
- def __getitem__(self, i):
- # assert self.__len__() == 256 (FIXME: does not improve)
- return array.__getitem__(self, i & 255)
-
- def main():
- buf = Circular()
- i = 10
- sa = 0
- while i < 2000 - 10:
- sa += buf[i-2] + buf[i-1] + buf[i] + buf[i+1] + buf[i+2]
- i += 1
- return sa
-
- ''', 170, ([], 1239690.0))
-
- def test_min_max(self):
- self.run_source('''
- def main():
- i=0
- sa=0
- while i < 2000:
- sa+=min(max(i, 3000), 4000)
- i+=1
- return sa
- ''', 51, ([], 2000*3000))
-
- def test_silly_max(self):
- self.run_source('''
- def main():
- i=2
- sa=0
- while i < 2000:
- sa+=max(*range(i))
- i+=1
- return sa
- ''', 125, ([], 1997001))
-
- def test_iter_max(self):
- self.run_source('''
- def main():
- i=2
- sa=0
- while i < 2000:
- sa+=max(range(i))
- i+=1
- return sa
- ''', 88, ([], 1997001))
-
def test__ffi_call(self):
from pypy.rlib.test.test_libffi import get_libm_name
libm_name = get_libm_name(sys.platform)
diff --git a/pypy/rpython/lltypesystem/test/test_rffi.py b/pypy/rpython/lltypesystem/test/test_rffi.py
--- a/pypy/rpython/lltypesystem/test/test_rffi.py
+++ b/pypy/rpython/lltypesystem/test/test_rffi.py
@@ -728,6 +728,7 @@
for ll, ctp in cache.items():
assert sizeof(ll) == ctypes.sizeof(ctp)
+ assert sizeof(lltype.Typedef(ll, 'test')) == sizeof(ll)
assert not size_and_sign(lltype.Signed)[1]
assert not size_and_sign(lltype.Char)[1]
assert not size_and_sign(lltype.UniChar)[1]
diff --git a/pypy/module/cpyext/test/test_api.py b/pypy/module/cpyext/test/test_api.py
--- a/pypy/module/cpyext/test/test_api.py
+++ b/pypy/module/cpyext/test/test_api.py
@@ -70,11 +70,35 @@
if self.check_and_print_leaks():
assert False, "Test leaks or loses object(s)."
+ at api.cpython_api([api.Py_ssize_t], api.Py_ssize_t, error=-1)
+def PyPy_TypedefTest1(space, arg):
+ assert lltype.typeOf(arg) == api.Py_ssize_t
+ return 0
+
+ at api.cpython_api([api.Py_ssize_tP], api.Py_ssize_tP)
+def PyPy_TypedefTest2(space, arg):
+ assert lltype.typeOf(arg) == api.Py_ssize_tP
+ return None
+
class TestConversion(BaseApiTest):
def test_conversions(self, space, api):
api.PyPy_GetWrapped(space.w_None)
api.PyPy_GetReference(space.w_None)
+ def test_typedef(self, space):
+ from pypy.translator.c.database import LowLevelDatabase
+ db = LowLevelDatabase()
+ assert (api.c_function_signature(db, api.FUNCTIONS['PyPy_TypedefTest1'])
+ == ('Py_ssize_t', 'Py_ssize_t arg0'))
+ assert (api.c_function_signature(db, api.FUNCTIONS['PyPy_TypedefTest2'])
+ == ('Py_ssize_t *', 'Py_ssize_t *arg0'))
+
+ PyPy_TypedefTest1(space, 0)
+ ppos = lltype.malloc(api.Py_ssize_tP.TO, 1, flavor='raw')
+ ppos[0] = 0
+ PyPy_TypedefTest2(space, ppos)
+ lltype.free(ppos, flavor='raw')
+
def test_copy_header_files(tmpdir):
api.copy_header_files(tmpdir)
diff --git a/pypy/rpython/lltypesystem/module/test/test_ll_math.py b/pypy/rpython/lltypesystem/module/test/test_ll_math.py
--- a/pypy/rpython/lltypesystem/module/test/test_ll_math.py
+++ b/pypy/rpython/lltypesystem/module/test/test_ll_math.py
@@ -3,6 +3,7 @@
from pypy.rpython.lltypesystem.module import ll_math
from pypy.module.math.test.test_direct import MathTests, get_tester
+from pypy.translator.c.test.test_genc import compile
class TestMath(MathTests):
@@ -21,6 +22,13 @@
assert ll_math.ll_math_isnan(nan)
assert not ll_math.ll_math_isnan(inf)
+ def test_compiled_isinf(self):
+ def f(x):
+ return ll_math.ll_math_isinf(1. / x)
+ f = compile(f, [float], backendopt=False)
+ assert f(5.5e-309)
+
+
def make_test_case((fnname, args, expected), dict):
#
def test_func(self):
More information about the Pypy-commit
mailing list