[pypy-svn] r48588 - in pypy/branch/more-unicode-improvements/pypy/objspace/std: . test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 12 00:16:12 CET 2007
Author: cfbolz
Date: Mon Nov 12 00:16:11 2007
New Revision: 48588
Modified:
pypy/branch/more-unicode-improvements/pypy/objspace/std/stringobject.py
pypy/branch/more-unicode-improvements/pypy/objspace/std/test/test_stringobject.py
pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodeobject.py
pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodetype.py
Log:
move string.encode and string.decode to interp-level. share code where possible.
Modified: pypy/branch/more-unicode-improvements/pypy/objspace/std/stringobject.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/objspace/std/stringobject.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/objspace/std/stringobject.py Mon Nov 12 00:16:11 2007
@@ -51,7 +51,6 @@
# XXX should this use the default encoding?
return _decode_ascii(space, w_self._value)
-
def _is_generic(space, w_self, fun):
v = w_self._value
if len(v) == 0:
@@ -905,29 +904,25 @@
L = [ table[ord(s[i])] for i in range(len(s)) if s[i] not in deletechars ]
return ''.join(L)
- def str_decode__String_ANY_ANY(str, encoding=None, errors=None):
- import codecs
- if encoding is None and errors is None:
- return unicode(str)
- elif errors is None:
- return codecs.getdecoder(encoding)(str)[0]
- else:
- return codecs.getdecoder(encoding)(str, errors)[0]
-
- def str_encode__String_ANY_ANY(str, encoding=None, errors=None):
- import codecs
- if encoding is None and errors is None:
- return unicode(str)
- elif errors is None:
- return codecs.getencoder(encoding)(str)[0]
- else:
- return codecs.getencoder(encoding)(str, errors)[0]
''', filename=__file__)
str_translate__String_ANY_ANY = app.interphook('str_translate__String_ANY_ANY')
-str_decode__String_ANY_ANY = app.interphook('str_decode__String_ANY_ANY')
-str_encode__String_ANY_ANY = app.interphook('str_encode__String_ANY_ANY')
+
+def str_decode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
+ from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+ unicode_from_string, encode_object
+ encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
+ if encoding is None and errors is None:
+ return unicode_from_string(space, w_string)
+ return encode_object(space, w_string, encoding, errors)
+
+def str_encode__String_ANY_ANY(space, w_string, w_encoding=None, w_errors=None):
+ #import pdb; pdb.set_trace()
+ from pypy.objspace.std.unicodetype import _get_encoding_and_errors, \
+ encode_object
+ encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
+ return encode_object(space, w_string, encoding, errors)
# CPython's logic for deciding if ""%values is
# an error (1 value, 0 %-formatters) or not
Modified: pypy/branch/more-unicode-improvements/pypy/objspace/std/test/test_stringobject.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/objspace/std/test/test_stringobject.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/objspace/std/test/test_stringobject.py Mon Nov 12 00:16:11 2007
@@ -645,6 +645,10 @@
def test_decode(self):
assert 'hello'.decode('rot-13') == 'uryyb'
assert 'hello'.decode('string-escape') == 'hello'
+
+ def test_encode(self):
+ assert 'hello'.encode() == 'hello'
+ assert type('hello'.encode()) is str
def test_hash(self):
# check that we have the same hash as CPython for at least 31 bits
Modified: pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodeobject.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodeobject.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodeobject.py Mon Nov 12 00:16:11 2007
@@ -739,24 +739,12 @@
w_encoding=None,
w_errors=None):
- from pypy.objspace.std.unicodetype import getdefaultencoding
- from pypy.objspace.std.unicodetype import _get_encoding_and_errors
- w_codecs = space.getbuiltinmodule("_codecs")
+ from pypy.objspace.std.unicodetype import getdefaultencoding, \
+ _get_encoding_and_errors, encode_object
encoding, errors = _get_encoding_and_errors(space, w_encoding, w_errors)
if encoding is None:
encoding = getdefaultencoding(space)
- w_encode = space.getattr(w_codecs, space.wrap("encode"))
- if errors is None:
- w_retval = space.call_function(w_encode, w_unistr, space.wrap(encoding))
- else:
- w_retval = space.call_function(w_encode, w_unistr, space.wrap(encoding),
- space.wrap(errors))
- if not space.is_true(space.isinstance(w_retval, space.w_str)):
- raise OperationError(
- space.w_TypeError,
- space.wrap(
- "encoder did not return an string object (type=%s)" %
- space.type(w_retval).getname(space, '?')))
+ w_retval = encode_object(space, w_unistr, encoding, errors)
return w_retval
def unicode_partition__Unicode_Unicode(space, w_unistr, w_unisub):
Modified: pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodetype.py (original)
+++ pypy/branch/more-unicode-improvements/pypy/objspace/std/unicodetype.py Mon Nov 12 00:16:11 2007
@@ -145,7 +145,34 @@
def getdefaultencoding(space):
return space.sys.defaultencoding
-def unicode_from_encoded_object(space, w_obj, encoding, errors):
+def _get_encoding_and_errors(space, w_encoding, w_errors):
+ if space.is_w(w_encoding, space.w_None):
+ encoding = None
+ else:
+ encoding = space.str_w(w_encoding)
+ if space.is_w(w_errors, space.w_None):
+ errors = None
+ else:
+ errors = space.str_w(w_errors)
+ return encoding, errors
+
+def encode_object(space, w_object, encoding, errors):
+ w_codecs = space.getbuiltinmodule("_codecs")
+ w_encode = space.getattr(w_codecs, space.wrap("encode"))
+ if errors is None:
+ w_retval = space.call_function(w_encode, w_object, space.wrap(encoding))
+ else:
+ w_retval = space.call_function(w_encode, w_object, space.wrap(encoding),
+ space.wrap(errors))
+ if not space.is_true(space.isinstance(w_retval, space.w_str)):
+ raise OperationError(
+ space.w_TypeError,
+ space.wrap(
+ "encoder did not return an string object (type=%s)" %
+ space.type(w_retval).getname(space, '?')))
+ return w_retval
+
+def decode_object(space, w_object, encoding, errors):
w_codecs = space.getbuiltinmodule("_codecs")
if encoding is None:
encoding = getdefaultencoding(space)
@@ -155,6 +182,11 @@
else:
w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding),
space.wrap(errors))
+ return w_retval
+
+
+def unicode_from_encoded_object(space, w_obj, encoding, errors):
+ w_retval = decode_object(space, w_obj, encoding, errors)
if not space.is_true(space.isinstance(w_retval, space.w_unicode)):
raise OperationError(
space.w_TypeError,
@@ -163,7 +195,6 @@
space.type(w_retval).getname(space, '?')))
return w_retval
-
def unicode_from_object(space, w_obj):
if space.is_true(space.isinstance(w_obj, space.w_str)):
w_res = w_obj
@@ -197,17 +228,6 @@
return unicode_from_object(space, w_str)
-def _get_encoding_and_errors(space, w_encoding, w_errors):
- if space.is_w(w_encoding, space.w_None):
- encoding = None
- else:
- encoding = space.str_w(w_encoding)
- if space.is_w(w_errors, space.w_None):
- errors = None
- else:
- errors = space.str_w(w_errors)
- return encoding, errors
-
def descr__new__(space, w_unicodetype, w_obj='', w_encoding=None, w_errors=None):
# NB. the default value of w_obj is really a *wrapped* empty string:
# there is gateway magic at work
More information about the Pypy-commit
mailing list