[pypy-commit] pypy default: Unicode characters out of range(0x11000): fix a few docstrings, and try to more
arigo
pypy.commits at gmail.com
Wed Mar 27 07:26:34 EDT 2019
Author: Armin Rigo <arigo at tunes.org>
Branch:
Changeset: r96359:a59c3b47eec9
Date: 2019-03-27 12:26 +0100
http://bitbucket.org/pypy/pypy/changeset/a59c3b47eec9/
Log: Unicode characters out of range(0x11000): fix a few docstrings, and
try to more systematically test (and fix) various corner cases
diff --git a/pypy/interpreter/unicodehelper.py b/pypy/interpreter/unicodehelper.py
--- a/pypy/interpreter/unicodehelper.py
+++ b/pypy/interpreter/unicodehelper.py
@@ -537,14 +537,17 @@
def wcharpsize2utf8(space, wcharp, size):
"""Safe version of rffi.wcharpsize2utf8.
- Raises app-level rutf8.OutOfRange if any wchar value is outside the valid
+ Raises app-level ValueError if any wchar value is outside the valid
codepoint range.
"""
try:
return rffi.wcharpsize2utf8(wcharp, size)
except rutf8.OutOfRange as e:
- raise oefmt(space.w_ValueError,
- "character %s is not in range [U+0000; U+10ffff]", 'U+%x' % e.code)
+ raise wrap_unicode_out_of_range_error(space, e)
+
+def wrap_unicode_out_of_range_error(space, e):
+ raise oefmt(space.w_ValueError,
+ "character %s is not in range [U+0000; U+10ffff]", 'U+%x' % e.code)
# ____________________________________________________________
diff --git a/pypy/module/_rawffi/interp_rawffi.py b/pypy/module/_rawffi/interp_rawffi.py
--- a/pypy/module/_rawffi/interp_rawffi.py
+++ b/pypy/module/_rawffi/interp_rawffi.py
@@ -5,6 +5,8 @@
from pypy.interpreter.gateway import interp2app, unwrap_spec
from pypy.interpreter.typedef import interp_attrproperty
from pypy.interpreter.typedef import TypeDef, GetSetProperty
+from pypy.interpreter.unicodehelper import wcharpsize2utf8
+from pypy.interpreter.unicodehelper import wrap_unicode_out_of_range_error
from rpython.rlib.clibffi import *
from rpython.rtyper.lltypesystem import lltype, rffi
@@ -596,10 +598,13 @@
if address == 0:
return space.w_None
wcharp_addr = rffi.cast(rffi.CWCHARP, address)
- if maxlength == -1:
- s, lgt = rffi.wcharp2utf8(wcharp_addr)
- else:
- s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength)
+ try:
+ if maxlength == -1:
+ s, lgt = rffi.wcharp2utf8(wcharp_addr)
+ else:
+ s, lgt = rffi.wcharp2utf8n(wcharp_addr, maxlength)
+ except rutf8.OutOfRange as e:
+ raise wrap_unicode_out_of_range_error(space, e)
return space.newutf8(s, lgt)
@unwrap_spec(address=r_uint, maxlength=int)
@@ -613,7 +618,7 @@
def wcharp2rawunicode(space, address, maxlength=-1):
if maxlength == -1:
return wcharp2unicode(space, address)
- s = rffi.wcharpsize2utf8(rffi.cast(rffi.CWCHARP, address), maxlength)
+ s = wcharpsize2utf8(space, rffi.cast(rffi.CWCHARP, address), maxlength)
return space.newutf8(s, maxlength)
@unwrap_spec(address=r_uint, newcontent='bufferstr')
diff --git a/pypy/module/_rawffi/test/test__rawffi.py b/pypy/module/_rawffi/test/test__rawffi.py
--- a/pypy/module/_rawffi/test/test__rawffi.py
+++ b/pypy/module/_rawffi/test/test__rawffi.py
@@ -1229,6 +1229,23 @@
lib = _rawffi.CDLL(self.lib_name)
assert lib.name == self.lib_name
+ def test_wcharp2rawunicode(self):
+ import _rawffi
+ A = _rawffi.Array('i')
+ arg = A(1)
+ arg[0] = 0x1234
+ u = _rawffi.wcharp2rawunicode(arg.itemaddress(0))
+ assert u == u'\u1234'
+ u = _rawffi.wcharp2rawunicode(arg.itemaddress(0), 1)
+ assert u == u'\u1234'
+ arg[0] = -1
+ raises(ValueError, _rawffi.wcharp2rawunicode, arg.itemaddress(0))
+ raises(ValueError, _rawffi.wcharp2rawunicode, arg.itemaddress(0), 1)
+ arg[0] = 0x110000
+ raises(ValueError, _rawffi.wcharp2rawunicode, arg.itemaddress(0))
+ raises(ValueError, _rawffi.wcharp2rawunicode, arg.itemaddress(0), 1)
+ arg.free()
+
class AppTestAutoFree:
spaceconfig = dict(usemodules=['_rawffi', 'struct'])
diff --git a/pypy/module/array/interp_array.py b/pypy/module/array/interp_array.py
--- a/pypy/module/array/interp_array.py
+++ b/pypy/module/array/interp_array.py
@@ -13,6 +13,7 @@
interp2app, interpindirect2app, unwrap_spec)
from pypy.interpreter.typedef import (
GetSetProperty, TypeDef, make_weakref_descr)
+from pypy.interpreter.unicodehelper import wcharpsize2utf8
from pypy.module._file.interp_file import W_File
@@ -463,7 +464,8 @@
"""
if self.typecode == 'u':
buf = rffi.cast(UNICODE_ARRAY, self._buffer_as_unsigned())
- return space.newutf8(rffi.wcharpsize2utf8(buf, self.len), self.len)
+ utf8 = wcharpsize2utf8(space, buf, self.len)
+ return space.newutf8(utf8, self.len)
else:
raise oefmt(space.w_ValueError,
"tounicode() may only be called on type 'u' arrays")
@@ -714,8 +716,15 @@
s = "array('%s', %s)" % (self.typecode, space.text_w(r))
return space.newtext(s)
elif self.typecode == "u":
- r = space.repr(self.descr_tounicode(space))
- s = "array('%s', %s)" % (self.typecode, space.text_w(r))
+ try:
+ w_unicode = self.descr_tounicode(space)
+ except OperationError as e:
+ if not e.match(space, space.w_ValueError):
+ raise
+ r = '<character out of range>'
+ else:
+ r = space.text_w(space.repr(w_unicode))
+ s = "array('%s', %s)" % (self.typecode, r)
return space.newtext(s)
else:
r = space.repr(self.descr_tolist(space))
diff --git a/pypy/module/array/test/test_array.py b/pypy/module/array/test/test_array.py
--- a/pypy/module/array/test/test_array.py
+++ b/pypy/module/array/test/test_array.py
@@ -847,10 +847,15 @@
assert repr(mya('i', (1, 2, 3))) == "array('i', [1, 2, 3])"
def test_unicode_outofrange(self):
- a = self.array('u', u'\x01\u263a\x00\ufeff')
- b = self.array('u', u'\x01\u263a\x00\ufeff')
+ input_unicode = u'\x01\u263a\x00\ufeff'
+ a = self.array('u', input_unicode)
+ b = self.array('u', input_unicode)
b.byteswap()
assert a != b
+ assert str(a) == "array('u', %r)" % (input_unicode,)
+ assert str(b) == "array('u', <character out of range>)"
+ assert a.tounicode() == input_unicode
+ raises(ValueError, b.tounicode) # doesn't work
def test_weakref(self):
import weakref
diff --git a/rpython/rtyper/lltypesystem/rffi.py b/rpython/rtyper/lltypesystem/rffi.py
--- a/rpython/rtyper/lltypesystem/rffi.py
+++ b/rpython/rtyper/lltypesystem/rffi.py
@@ -1023,7 +1023,7 @@
def wcharpsize2utf8(w, size):
""" Helper to convert WCHARP pointer to utf8 in one go.
Equivalent to wcharpsize2unicode().encode("utf8")
- Raises ValueError if characters are outside range(0x110000)!
+ Raises rutf8.OutOfRange if characters are outside range(0x110000)!
"""
from rpython.rlib import rutf8
@@ -1033,6 +1033,9 @@
return s.build()
def wcharp2utf8(w):
+ """
+ Raises rutf8.OutOfRange if characters are outside range(0x110000)!
+ """
from rpython.rlib import rutf8
s = rutf8.Utf8StringBuilder()
@@ -1043,6 +1046,9 @@
return s.build(), i
def wcharp2utf8n(w, maxlen):
+ """
+ Raises rutf8.OutOfRange if characters are outside range(0x110000)!
+ """
from rpython.rlib import rutf8
s = rutf8.Utf8StringBuilder(maxlen)
More information about the pypy-commit
mailing list