[pypy-commit] pypy default: reduce code duplication
mattip
pypy.commits at gmail.com
Tue Mar 12 04:03:08 EDT 2019
Author: Matti Picus <matti.picus at gmail.com>
Branch:
Changeset: r96292:0a08f62b2c9f
Date: 2019-03-12 10:02 +0200
http://bitbucket.org/pypy/pypy/changeset/0a08f62b2c9f/
Log: reduce code duplication
diff --git a/pypy/module/_codecs/interp_codecs.py b/pypy/module/_codecs/interp_codecs.py
--- a/pypy/module/_codecs/interp_codecs.py
+++ b/pypy/module/_codecs/interp_codecs.py
@@ -306,8 +306,8 @@
return w_err_handler
- at unwrap_spec(errors='text')
-def encode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def encode(space, w_obj, encoding=None, errors=None):
"""encode(obj, [encoding[,errors]]) -> object
Encodes obj using the codec registered for encoding. encoding defaults
@@ -317,13 +317,19 @@
'xmlcharrefreplace' as well as any other name registered with
codecs.register_error that can handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
+ w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
+ if errors:
+ w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
else:
- encoding = space.text_w(w_encoding)
- w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
- w_res = space.call_function(w_encoder, w_obj, space.newtext(errors))
- return space.getitem(w_res, space.newint(0))
+ w_res = space.call_function(w_encoder, w_obj)
+ w_retval = space.getitem(w_res, space.newint(0))
+ if not space.isinstance_w(w_retval, space.w_bytes):
+ raise oefmt(space.w_TypeError,
+ "encoder did not return an string object (type '%T')",
+ w_retval)
+ return w_retval
@unwrap_spec(errors='text_or_none')
def readbuffer_encode(space, w_data, errors='strict'):
@@ -335,8 +341,8 @@
s = space.getarg_w('t#', w_data)
return space.newtuple([space.newbytes(s), space.newint(len(s))])
- at unwrap_spec(errors='text')
-def decode(space, w_obj, w_encoding=None, errors='strict'):
+ at unwrap_spec(encoding='text_or_none', errors='text_or_none')
+def decode(space, w_obj, encoding=None, errors=None):
"""decode(obj, [encoding[,errors]]) -> object
Decodes obj using the codec registered for encoding. encoding defaults
@@ -346,19 +352,17 @@
as well as any other name registered with codecs.register_error that is
able to handle ValueErrors.
"""
- if w_encoding is None:
+ if encoding is None:
encoding = space.sys.defaultencoding
+ w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
+ if errors:
+ w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
else:
- encoding = space.text_w(w_encoding)
- w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
- if space.is_true(w_decoder):
- w_res = space.call_function(w_decoder, w_obj, space.newtext(errors))
- if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) != 2):
- raise oefmt(space.w_TypeError,
- "encoder must return a tuple (object, integer)")
- return space.getitem(w_res, space.newint(0))
- else:
- assert 0, "XXX, what to do here?"
+ w_res = space.call_function(w_decoder, w_obj)
+ if (not space.isinstance_w(w_res, space.w_tuple) or space.len_w(w_res) != 2):
+ raise oefmt(space.w_TypeError,
+ "encoder must return a tuple (object, integer)")
+ return space.getitem(w_res, space.newint(0))
@unwrap_spec(errors='text')
def register_error(space, errors, w_handler):
diff --git a/pypy/module/_codecs/test/test_codecs.py b/pypy/module/_codecs/test/test_codecs.py
--- a/pypy/module/_codecs/test/test_codecs.py
+++ b/pypy/module/_codecs/test/test_codecs.py
@@ -468,8 +468,10 @@
return (encode_one, decode_one, None, None)
return None
_codecs.register(search_function)
- assert u"hello".encode("onearg") == 'foo'
- assert b"hello".decode("onearg") == 'foo'
+ assert u"hello".encode("onearg") == b'foo'
+ assert b"hello".decode("onearg") == u'foo'
+ assert _codecs.encode(u"hello", "onearg") == b'foo'
+ assert _codecs.decode(b"hello", "onearg") == u'foo'
def test_cpytest_decode(self):
import codecs
diff --git a/pypy/objspace/std/unicodeobject.py b/pypy/objspace/std/unicodeobject.py
--- a/pypy/objspace/std/unicodeobject.py
+++ b/pypy/objspace/std/unicodeobject.py
@@ -1071,16 +1071,17 @@
return encoding, errors
-def encode_object(space, w_object, encoding, errors):
- w_encoder = None
+def encode_object(space, w_obj, encoding, errors):
+ from pypy.module._codecs.interp_codecs import encode
if errors is None or errors == 'strict':
+ # fast path
if ((encoding is None and space.sys.defaultencoding == 'ascii') or
encoding == 'ascii'):
- s = space.utf8_w(w_object)
+ s = space.utf8_w(w_obj)
try:
rutf8.check_ascii(s)
except rutf8.CheckError as a:
- if space.isinstance_w(w_object, space.w_unicode):
+ if space.isinstance_w(w_obj, space.w_unicode):
eh = unicodehelper.encode_error_handler(space)
else:
# must be a bytes-like object. In order to encode it,
@@ -1093,32 +1094,17 @@
return space.newbytes(s)
if ((encoding is None and space.sys.defaultencoding == 'utf8') or
encoding == 'utf-8' or encoding == 'utf8' or encoding == 'UTF-8'):
- utf8 = space.utf8_w(w_object)
+ utf8 = space.utf8_w(w_obj)
if rutf8.has_surrogates(utf8):
utf8 = rutf8.reencode_utf8_with_surrogates(utf8)
return space.newbytes(utf8)
- if encoding is None:
- # Get the encoder functions as a wrapped object.
- # This lookup is cached.
- w_encoder = space.sys.get_w_default_encoder()
- if w_encoder is None:
- from pypy.module._codecs.interp_codecs import lookup_codec
- w_encoder = space.getitem(lookup_codec(space, encoding), space.newint(0))
- if errors is None:
- w_restuple = space.call_function(w_encoder, w_object)
- else:
- w_errors = space.newtext(errors)
- w_restuple = space.call_function(w_encoder, w_object, w_errors)
- w_retval = space.getitem(w_restuple, space.newint(0))
- if not space.isinstance_w(w_retval, space.w_bytes):
- raise oefmt(space.w_TypeError,
- "encoder did not return an string object (type '%T')",
- w_retval)
- return w_retval
+ return encode(space, w_obj, encoding, errors)
def decode_object(space, w_obj, encoding, errors):
+ from pypy.module._codecs.interp_codecs import lookup_codec, decode
if errors is None or errors == 'strict':
+ # fast paths
if encoding is None:
encoding = getdefaultencoding(space)
if encoding == 'ascii':
@@ -1133,20 +1119,9 @@
s = space.charbuf_w(w_obj)
lgt = unicodehelper.check_utf8_or_raise(space, s)
return space.newutf8(s, lgt)
- w_decoder = None
if encoding is None:
- # Get the decoder functions as a wrapped object.
- # This lookup is cached.
- w_decoder = space.sys.get_w_default_decoder()
- if w_decoder is None:
- from pypy.module._codecs.interp_codecs import lookup_codec
- w_decoder = space.getitem(lookup_codec(space, encoding), space.newint(1))
- if errors is None:
- w_retval = space.call_function(w_decoder, w_obj)
- else:
- w_retval = space.call_function(w_decoder, w_obj, space.newtext(errors))
- return space.getitem(w_retval, space.newint(0))
-
+ encoding = space.sys.defaultencoding
+ return decode(space, w_obj, encoding, errors)
def unicode_from_encoded_object(space, w_obj, encoding, errors):
# explicitly block bytearray on 2.7
More information about the pypy-commit
mailing list