[pypy-commit] pypy unicode-utf8: (fijal, arigo)
arigo
pypy.commits at gmail.com
Thu Aug 24 08:50:48 EDT 2017
Author: Armin Rigo <arigo at tunes.org>
Branch: unicode-utf8
Changeset: r92249:c9a84142d1e3
Date: 2017-08-24 14:50 +0200
http://bitbucket.org/pypy/pypy/changeset/c9a84142d1e3/
Log: (fijal, arigo)
Fix the gateway logic: we can now pass 'utf8' to get just a
utf-8-encoded string
diff --git a/pypy/interpreter/gateway.py b/pypy/interpreter/gateway.py
--- a/pypy/interpreter/gateway.py
+++ b/pypy/interpreter/gateway.py
@@ -160,6 +160,9 @@
def visit_text0(self, el, app_sig):
self.checked_space_method(el, app_sig)
+ def visit_utf8(self, el, app_sig):
+ self.checked_space_method(el, app_sig)
+
def visit_fsencode(self, el, app_sig):
self.checked_space_method(el, app_sig)
@@ -244,7 +247,6 @@
def __init__(self):
UnwrapSpecEmit.__init__(self)
self.run_args = []
- self.extracode = []
def scopenext(self):
return "scope_w[%d]" % self.succ()
@@ -305,6 +307,9 @@
def visit_text0(self, typ):
self.run_args.append("space.text0_w(%s)" % (self.scopenext(),))
+ def visit_utf8(self, typ):
+ self.run_args.append("space.utf8_w(%s)" % (self.scopenext(),))
+
def visit_fsencode(self, typ):
self.run_args.append("space.fsencode_w(%s)" % (self.scopenext(),))
@@ -359,9 +364,8 @@
d = {}
source = """if 1:
def _run(self, space, scope_w):
- %s
return self.behavior(%s)
- \n""" % ("\n".join(self.extracode), ', '.join(self.run_args))
+ \n""" % (', '.join(self.run_args),)
exec compile2(source) in self.miniglobals, d
activation_cls = type("BuiltinActivation_UwS_%s" % label,
@@ -402,7 +406,6 @@
UnwrapSpecEmit.__init__(self)
self.args = []
self.unwrap = []
- self.extracode = []
self.finger = 0
def dispatch(self, el, *args):
@@ -472,6 +475,9 @@
def visit_text0(self, typ):
self.unwrap.append("space.text0_w(%s)" % (self.nextarg(),))
+ def visit_utf8(self, typ):
+ self.unwrap.append("space.utf8_w(%s)" % (self.nextarg(),))
+
def visit_fsencode(self, typ):
self.unwrap.append("space.fsencode_w(%s)" % (self.nextarg(),))
@@ -526,10 +532,9 @@
unwrap_info.miniglobals['func'] = func
source = """if 1:
def fastfunc_%s_%d(%s):
- %s
return func(%s)
\n""" % (func.__name__.replace('-', '_'), narg,
- ', '.join(args), '\n'.join(unwrap_info.extracode),
+ ', '.join(args),
', '.join(unwrap_info.unwrap))
exec compile2(source) in unwrap_info.miniglobals, d
fastfunc = d['fastfunc_%s_%d' % (func.__name__.replace('-', '_'), narg)]
diff --git a/pypy/interpreter/test/test_gateway.py b/pypy/interpreter/test/test_gateway.py
--- a/pypy/interpreter/test/test_gateway.py
+++ b/pypy/interpreter/test/test_gateway.py
@@ -538,8 +538,8 @@
def test_interp2app_unwrap_spec_utf8(self):
space = self.space
w = space.wrap
- def g3_u(space, utf8, utf8len):
- return space.newtuple([space.wrap(len(utf8)), space.wrap(utf8len)])
+ def g3_u(space, utf8):
+ return space.wrap(utf8)
app_g3_u = gateway.interp2app_temp(g3_u,
unwrap_spec=[gateway.ObjSpace,
'utf8'])
@@ -547,14 +547,20 @@
encoded = u"gęść".encode('utf8')
assert self.space.eq_w(
space.call_function(w_app_g3_u, w(u"gęść")),
- space.newtuple([w(len(encoded)), w(4)]))
+ w(encoded))
assert self.space.eq_w(
space.call_function(w_app_g3_u, w("foo")),
- space.newtuple([w(3), w(3)]))
+ w("foo"))
raises(gateway.OperationError, space.call_function, w_app_g3_u,
w(None))
raises(gateway.OperationError, space.call_function, w_app_g3_u,
w(42))
+ w_ascii = space.appexec([], """():
+ import sys
+ return sys.getdefaultencoding() == 'ascii'""")
+ if space.is_true(w_ascii):
+ raises(gateway.OperationError, space.call_function, w_app_g3_u,
+ w("\x80"))
def test_interp2app_unwrap_spec_unwrapper(self):
space = self.space
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -374,10 +374,10 @@
def make_encoder_wrapper(name):
rname = "utf8_encode_%s" % (name.replace("_encode", ""), )
- XXX
@unwrap_spec(utf8='utf8', errors='text_or_none')
def wrap_encoder(space, utf8, utf8len, errors="strict"):
from pypy.interpreter import unicodehelper
+ XXX
if errors is None:
errors = 'strict'
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -831,7 +831,8 @@
s = space.charbuf_w(w_obj)
try:
rutf8.check_ascii(s)
- except rutf8.AsciiCheckError as e:
+ except rutf8.CheckError:
+ XXX
unicodehelper.decode_error_handler(space)(None,
'ascii', "ordinal not in range(128)", s, e.pos, e.pos+1)
assert False
@@ -842,7 +843,8 @@
try:
_, lgt = rutf8.str_check_utf8(s, len(s), final=True,
allow_surrogates=True)
- except rutf8.Utf8CheckError as e:
+ except rutf8.CheckError:
+ XXX
eh(None, 'utf8', e.msg, s, e.startpos, e.endpos)
assert False, "has to raise"
return space.newutf8(s, lgt)
More information about the pypy-commit
mailing list