[pypy-commit] pypy default: Tweak the RPython and PyPy ord() to behave like CPython's when given
arigo
noreply at buildbot.pypy.org
Mon Sep 28 10:35:21 CEST 2015
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r79878:47c87779c73a
Date: 2015-09-28 10:35 +0200
http://bitbucket.org/pypy/pypy/changeset/47c87779c73a/
Log: Tweak the RPython and PyPy ord() to behave like CPython's when given
strange inputs: never return negative numbers on 64-bit. Also fix
the repr() of unicodes containing such a character. (Tested in the
array module because it's hard to make invalid unichars otherwise.)
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -844,6 +844,18 @@
b.byteswap()
assert a != b
+ def test_unicode_ord_positive(self):
+ import sys
+ if sys.maxunicode == 0xffff:
+ skip("test for 32-bit unicodes")
+ a = self.array('u', '\xff\xff\xff\xff')
+ assert len(a) == 1
+ assert repr(a[0]) == "u'\Uffffffff'"
+ if sys.maxint == 2147483647:
+ assert ord(a[0]) == -1
+ else:
+ assert ord(a[0]) == 4294967295
+
def test_weakref(self):
import weakref
a = self.array('c', 'Hi!')
diff --git a/rpython/annotator/unaryop.py b/rpython/annotator/unaryop.py
--- a/rpython/annotator/unaryop.py
+++ b/rpython/annotator/unaryop.py
@@ -652,11 +652,11 @@
def len(self):
return immutablevalue(1)
+class __extend__(SomeChar):
+
def ord(self):
return SomeInteger(nonneg=True)
-class __extend__(SomeChar):
-
def method_isspace(self):
return s_Bool
@@ -675,6 +675,13 @@
def method_upper(self):
return self
+class __extend__(SomeUnicodeCodePoint):
+
+ def ord(self):
+ # warning, on 32-bit with 32-bit unichars, this might return
+ # negative numbers
+ return SomeInteger()
+
class __extend__(SomeIterator):
def iter(self):
diff --git a/rpython/jit/metainterp/test/test_ajit.py b/rpython/jit/metainterp/test/test_ajit.py
--- a/rpython/jit/metainterp/test/test_ajit.py
+++ b/rpython/jit/metainterp/test/test_ajit.py
@@ -4320,14 +4320,14 @@
self.meta_interp(allfuncs, [9, 2000])
- def test_unichar_might_be_signed(self):
- py.test.skip("wchar_t is sometimes a signed 32-bit integer type, "
- "but RPython inteprets it as unsigned (but still "
- "translates to wchar_t, so can create confusion)")
+ def test_unichar_ord_is_never_signed_on_64bit(self):
+ import sys
+ if sys.maxunicode == 0xffff:
+ py.test.skip("test for 32-bit unicodes")
def f(x):
- return rffi.cast(lltype.Signed, rffi.cast(lltype.UniChar, x))
+ return ord(rffi.cast(lltype.UniChar, x))
res = self.interp_operations(f, [-1])
- if rffi.r_wchar_t.SIGN:
+ if sys.maxint == 2147483647:
assert res == -1
else:
- assert res == 2 ** 16 - 1 or res == 2 ** 32 - 1
+ assert res == 4294967295
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1403,11 +1403,10 @@
result.append(CHR(quote))
return result.build()
+ TABLE = STR('0123456789abcdef')
+
def char_escape_helper(result, char):
- num = hex(char)
- if STR is unicode:
- num = num.decode('ascii')
- if char >= 0x10000:
+ if char >= 0x10000 or char < 0:
result.append(STR("\\U"))
zeros = 8
elif char >= 0x100:
@@ -1416,11 +1415,8 @@
else:
result.append(STR("\\x"))
zeros = 2
- lnum = len(num)
- nb = zeros + 2 - lnum # num starts with '0x'
- if nb > 0:
- result.append_multiple_char(STR('0'), nb)
- result.append_slice(num, 2, lnum)
+ for i in range(zeros-1, -1, -1):
+ result.append(TABLE[(char >> (4 * i)) & 0x0f])
return unicode_escape, char_escape_helper
diff --git a/rpython/translator/c/src/int.h b/rpython/translator/c/src/int.h
--- a/rpython/translator/c/src/int.h
+++ b/rpython/translator/c/src/int.h
@@ -231,8 +231,12 @@
#define OP_TRUNCATE_LONGLONG_TO_INT(x,r) r = (Signed)(x)
#define OP_TRUNCATE_LONGLONGLONG_TO_INT(x,r) r = (Signed)(x)
-#define OP_CAST_UNICHAR_TO_INT(x,r) r = (Signed)((Unsigned)(x)) /*?*/
-#define OP_CAST_INT_TO_UNICHAR(x,r) r = (unsigned int)(x)
+/* Casting from UniChar to int goes first via "unsigned int".
+ On 64-bit platforms, this forces a signed 32-bit wchar_t
+ to an unsigned integer, which is also what CPython's ord()
+ does. */
+#define OP_CAST_UNICHAR_TO_INT(x,r) r = ((unsigned int)(x))
+#define OP_CAST_INT_TO_UNICHAR(x,r) r = (x)
/* bool operations */
More information about the pypy-commit
mailing list