[pypy-svn] r75146 - in pypy/trunk: . pypy/annotation pypy/annotation/test pypy/module/_codecs pypy/module/sys pypy/objspace/std pypy/rpython pypy/rpython/lltypesystem pypy/rpython/test
fijal at codespeak.net
fijal at codespeak.net
Sun Jun 6 17:34:23 CEST 2010
Author: fijal
Date: Sun Jun 6 17:34:21 2010
New Revision: 75146
Modified:
pypy/trunk/ (props changed)
pypy/trunk/pypy/annotation/binaryop.py
pypy/trunk/pypy/annotation/builtin.py
pypy/trunk/pypy/annotation/classdef.py
pypy/trunk/pypy/annotation/test/test_annrpython.py
pypy/trunk/pypy/module/_codecs/interp_codecs.py
pypy/trunk/pypy/module/sys/__init__.py
pypy/trunk/pypy/module/sys/interp_encoding.py
pypy/trunk/pypy/objspace/std/unicodetype.py
pypy/trunk/pypy/rpython/lltypesystem/rstr.py
pypy/trunk/pypy/rpython/rbuiltin.py
pypy/trunk/pypy/rpython/test/test_rbuiltin.py
Log:
Merge encodings-unfreeze branch. This should kill freezing of encodings
module from lib-python.
Modified: pypy/trunk/pypy/annotation/binaryop.py
==============================================================================
--- pypy/trunk/pypy/annotation/binaryop.py (original)
+++ pypy/trunk/pypy/annotation/binaryop.py Sun Jun 6 17:34:21 2010
@@ -442,7 +442,7 @@
return SomeUnicodeString()
class __extend__(pairtype(SomeString, SomeUnicodeString),
- pairtype(SomeString, SomeUnicodeString)):
+ pairtype(SomeUnicodeString, SomeString)):
def mod((str, unistring)):
raise NotImplementedError(
"string formatting mixing strings and unicode not supported")
Modified: pypy/trunk/pypy/annotation/builtin.py
==============================================================================
--- pypy/trunk/pypy/annotation/builtin.py (original)
+++ pypy/trunk/pypy/annotation/builtin.py Sun Jun 6 17:34:21 2010
@@ -272,6 +272,12 @@
def OSError_init(s_self, *args):
pass
+def UnicodeDecodeError_init(s_self, *args):
+ pass
+
+def UnicodeEncodeError_init(s_self, *args):
+ pass
+
def WindowsError_init(s_self, *args):
pass
@@ -390,6 +396,8 @@
BUILTIN_ANALYZERS[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
OSError_init)
+BUILTIN_ANALYZERS[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = UnicodeDecodeError_init
+BUILTIN_ANALYZERS[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = UnicodeEncodeError_init
try:
WindowsError
Modified: pypy/trunk/pypy/annotation/classdef.py
==============================================================================
--- pypy/trunk/pypy/annotation/classdef.py (original)
+++ pypy/trunk/pypy/annotation/classdef.py Sun Jun 6 17:34:21 2010
@@ -3,7 +3,7 @@
"""
from pypy.annotation.model import SomePBC, s_ImpossibleValue, unionof
from pypy.annotation.model import SomeInteger, isdegenerated, SomeTuple,\
- SomeString
+ SomeString, SomeUnicodeString
from pypy.annotation import description
@@ -439,6 +439,16 @@
FORCE_ATTRIBUTES_INTO_CLASSES = {
OSError: {'errno': SomeInteger()},
+ UnicodeDecodeError: {'end': SomeInteger(),
+ 'start': SomeInteger(),
+ 'object': SomeString(),
+ 'encoding': SomeString(),
+ 'reason': SomeString()},
+ UnicodeEncodeError: {'end': SomeInteger(),
+ 'start': SomeInteger(),
+ 'object': SomeUnicodeString(),
+ 'encoding': SomeString(),
+ 'reason': SomeString()}
}
try:
Modified: pypy/trunk/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/trunk/pypy/annotation/test/test_annrpython.py (original)
+++ pypy/trunk/pypy/annotation/test/test_annrpython.py Sun Jun 6 17:34:21 2010
@@ -3321,6 +3321,17 @@
s = a.build_types(g, [int])
assert a.bookkeeper.getdesc(f).getuniquegraph()
+ def test_unicode_decode_error(self):
+ def f():
+ try:
+ raise UnicodeDecodeError("x", "x", 0, 1, "reason")
+ except UnicodeDecodeError, ude:
+ return ude.end
+
+ a = self.RPythonAnnotator()
+ s = a.build_types(f, [])
+ assert isinstance(s, annmodel.SomeInteger)
+
def g(n):
return [0,1,2,n]
Modified: pypy/trunk/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/interp_codecs.py (original)
+++ pypy/trunk/pypy/module/_codecs/interp_codecs.py Sun Jun 6 17:34:21 2010
@@ -2,6 +2,7 @@
from pypy.interpreter.gateway import ObjSpace, NoneNotWrapped, applevel
from pypy.interpreter.baseobjspace import W_Root
from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
+from pypy.rlib.objectmodel import we_are_translated
class CodecState(object):
def __init__(self, space):
@@ -52,6 +53,9 @@
return replace, newpos
return unicode_call_errorhandler
+ def _freeze_(self):
+ assert not self.codec_search_path
+ return False
def register_codec(space, w_search_function):
"""register(search_function)
@@ -75,6 +79,8 @@
Looks up a codec tuple in the Python codec registry and returns
a tuple of functions.
"""
+ assert not (space.config.translating and not we_are_translated()), \
+ "lookup_codec() should not be called during translation"
state = space.fromcache(CodecState)
normalized_encoding = encoding.replace(" ", "-").lower()
w_result = state.codec_search_cache.get(normalized_encoding, None)
@@ -215,11 +221,8 @@
else:
encoding = space.str_w(w_encoding)
w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
- if space.is_true(w_encoder):
- w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
- return space.getitem(w_res, space.wrap(0))
- else:
- assert 0, "XXX, what to do here?"
+ w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
+ return space.getitem(w_res, space.wrap(0))
encode.unwrap_spec = [ObjSpace, W_Root, W_Root, str]
def buffer_encode(space, s, errors='strict'):
Modified: pypy/trunk/pypy/module/sys/__init__.py
==============================================================================
--- pypy/trunk/pypy/module/sys/__init__.py (original)
+++ pypy/trunk/pypy/module/sys/__init__.py Sun Jun 6 17:34:21 2010
@@ -1,5 +1,6 @@
from pypy.interpreter.mixedmodule import MixedModule
from pypy.interpreter.error import OperationError
+from pypy.rlib.objectmodel import we_are_translated
import sys
class Module(MixedModule):
@@ -92,8 +93,12 @@
self.space.setitem(w_modules, w_name, w_module)
def startup(self, space):
- from pypy.module.sys.interp_encoding import _getfilesystemencoding
- self.filesystemencoding = _getfilesystemencoding(space)
+ if space.config.translating and not we_are_translated():
+ # don't get the filesystemencoding at translation time
+ assert self.filesystemencoding is None
+ else:
+ from pypy.module.sys.interp_encoding import _getfilesystemencoding
+ self.filesystemencoding = _getfilesystemencoding(space)
def getmodule(self, name):
space = self.space
Modified: pypy/trunk/pypy/module/sys/interp_encoding.py
==============================================================================
--- pypy/trunk/pypy/module/sys/interp_encoding.py (original)
+++ pypy/trunk/pypy/module/sys/interp_encoding.py Sun Jun 6 17:34:21 2010
@@ -1,5 +1,6 @@
import sys
from pypy.rlib import rlocale
+from pypy.rlib.objectmodel import we_are_translated
def getdefaultencoding(space):
"""Return the current default string encoding used by the Unicode
@@ -18,6 +19,8 @@
space.sys.defaultencoding = encoding
def get_w_default_encoder(space):
+ assert not (space.config.translating and not we_are_translated()), \
+ "get_w_default_encoder() should not be called during translation"
w_encoding = space.wrap(space.sys.defaultencoding)
mod = space.getbuiltinmodule("_codecs")
w_lookup = space.getattr(mod, space.wrap("lookup"))
@@ -40,6 +43,8 @@
rlocale.setlocale(rlocale.LC_CTYPE, "")
loc_codeset = rlocale.nl_langinfo(rlocale.CODESET)
if loc_codeset:
+ if loc_codeset == 'ANSI_X3.4-1968':
+ loc_codeset = 'ascii'
codecmod = space.getbuiltinmodule('_codecs')
w_res = space.call_function(space.getattr(codecmod,
space.wrap('lookup')),
Modified: pypy/trunk/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/unicodetype.py (original)
+++ pypy/trunk/pypy/objspace/std/unicodetype.py Sun Jun 6 17:34:21 2010
@@ -3,6 +3,8 @@
from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
from pypy.objspace.std.register_all import register_all
from pypy.objspace.std.basestringtype import basestring_typedef
+from pypy.rlib.runicode import str_decode_utf_8, str_decode_ascii,\
+ unicode_encode_utf_8, unicode_encode_ascii
from sys import maxint
@@ -183,13 +185,27 @@
return encoding, errors
def encode_object(space, w_object, encoding, errors):
- # XXX write down shortcuts for performance for common encodings,
- # just like CPython
if encoding is None:
# Get the encoder functions as a wrapped object.
# This lookup is cached.
w_encoder = space.sys.get_w_default_encoder()
else:
+ if errors is None or errors == 'strict':
+ try:
+ if encoding == 'ascii':
+ u = space.unicode_w(w_object)
+ return space.wrap(unicode_encode_ascii(u, len(u), None))
+ if encoding == 'utf-8':
+ u = space.unicode_w(w_object)
+ return space.wrap(unicode_encode_utf_8(u, len(u), None))
+ except UnicodeEncodeError, uee:
+ raise OperationError(space.w_UnicodeEncodeError,
+ space.newtuple([
+ space.wrap(uee.encoding),
+ space.wrap(uee.object),
+ space.wrap(uee.start),
+ space.wrap(uee.end),
+ space.wrap(uee.reason)]))
from pypy.module._codecs.interp_codecs import lookup_codec
w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
if errors is None:
@@ -205,9 +221,23 @@
return w_retval
def decode_object(space, w_obj, encoding, errors):
- w_codecs = space.getbuiltinmodule("_codecs")
if encoding is None:
encoding = getdefaultencoding(space)
+ if errors is None or errors == 'strict':
+ try:
+ if encoding == 'ascii':
+ # XXX error handling
+ s = space.bufferstr_w(w_obj)
+ return space.wrap(str_decode_ascii(s, len(s), None)[0])
+ if encoding == 'utf-8':
+ s = space.bufferstr_w(w_obj)
+ return space.wrap(str_decode_utf_8(s, len(s), None)[0])
+ except UnicodeDecodeError, ude:
+ raise OperationError(space.w_UnicodeDecodeError, space.newtuple(
+ [space.wrap(ude.encoding), space.wrap(ude.object),
+ space.wrap(ude.start), space.wrap(ude.end),
+ space.wrap(ude.reason)]))
+ w_codecs = space.getbuiltinmodule("_codecs")
w_decode = space.getattr(w_codecs, space.wrap("decode"))
if errors is None:
w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding))
Modified: pypy/trunk/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/rstr.py (original)
+++ pypy/trunk/pypy/rpython/lltypesystem/rstr.py Sun Jun 6 17:34:21 2010
@@ -18,6 +18,7 @@
from pypy.rpython.rmodel import Repr
from pypy.rpython.lltypesystem import llmemory
from pypy.tool.sourcetools import func_with_new_name
+from pypy.rpython.annlowlevel import hlstr
# ____________________________________________________________
#
@@ -296,7 +297,7 @@
s = mallocunicode(lgt)
for i in range(lgt):
if ord(str.chars[i]) > 127:
- raise UnicodeDecodeError
+ raise UnicodeDecodeError("ascii", hlstr(str), 0, lgt, "ascii codec can't encode %d" % ord(str.chars[i]))
s.chars[i] = cast_primitive(UniChar, str.chars[i])
return s
ll_str2unicode.oopspec = 'str.str2unicode(str)'
Modified: pypy/trunk/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/rbuiltin.py (original)
+++ pypy/trunk/pypy/rpython/rbuiltin.py Sun Jun 6 17:34:21 2010
@@ -270,6 +270,37 @@
v_errno = hop.inputarg(lltype.Signed, arg=1)
r_self.setfield(v_self, 'errno', v_errno, hop.llops)
+def rtype_UnicodeDecodeError_init(hop):
+ if hop.nb_args != 6:
+ raise TypeError("UnicodeDecodeError() should be called with 5 "
+ "arguments")
+ r_self = hop.args_r[0]
+ r_str = hop.rtyper.type_system.rstr.string_repr
+ TPS = [hop.args_r[0], r_str, r_str, lltype.Signed, lltype.Signed,
+ r_str]
+ v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
+ r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
+ r_self.setfield(v_self, 'object', v_obj, hop.llops)
+ r_self.setfield(v_self, 'start', v_start, hop.llops)
+ r_self.setfield(v_self, 'end', v_end, hop.llops)
+ r_self.setfield(v_self, 'reason', v_msg, hop.llops)
+
+def rtype_UnicodeEncodeError_init(hop):
+ if hop.nb_args != 6:
+ raise TypeError("UnicodeEncodeError() should be called with 5 "
+ "arguments")
+ r_self = hop.args_r[0]
+ r_str = hop.rtyper.type_system.rstr.string_repr
+ r_unicode = hop.rtyper.type_system.rstr.unicode_repr
+ TPS = [hop.args_r[0], r_str, r_unicode, lltype.Signed, lltype.Signed,
+ r_str]
+ v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
+ r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
+ r_self.setfield(v_self, 'object', v_obj, hop.llops)
+ r_self.setfield(v_self, 'start', v_start, hop.llops)
+ r_self.setfield(v_self, 'end', v_end, hop.llops)
+ r_self.setfield(v_self, 'reason', v_msg, hop.llops)
+
def rtype_WindowsError__init__(hop):
if hop.nb_args == 2:
raise TyperError("WindowsError() should not be called with "
@@ -329,6 +360,8 @@
BUILTIN_TYPER[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
rtype_OSError__init__)
+BUILTIN_TYPER[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = rtype_UnicodeDecodeError_init
+BUILTIN_TYPER[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = rtype_UnicodeEncodeError_init
try:
WindowsError
Modified: pypy/trunk/pypy/rpython/test/test_rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/test/test_rbuiltin.py (original)
+++ pypy/trunk/pypy/rpython/test/test_rbuiltin.py Sun Jun 6 17:34:21 2010
@@ -496,6 +496,35 @@
res = self.interpret(llf, [rffi.r_short(123)], policy=LowLevelAnnotatorPolicy())
assert res == 123
+ def test_unicode_errors(self):
+ def f():
+ try:
+ raise UnicodeDecodeError("xx", "x", 0, 1, "reason")
+ except UnicodeDecodeError, ude:
+ assert ude.start == 0
+ assert ude.encoding == "xx"
+ assert ude.object == "x"
+ assert ude.start == 0
+ assert ude.reason == "reason"
+ return ude.end
+
+ res = self.interpret(f, [])
+ assert res == f()
+
+ def f():
+ try:
+ raise UnicodeEncodeError("xx", u"x", 0, 1, "reason")
+ except UnicodeEncodeError, ude:
+ assert ude.start == 0
+ assert ude.encoding == "xx"
+ assert ude.object == u"x"
+ assert ude.start == 0
+ assert ude.reason == "reason"
+ return ude.end
+
+ res = self.interpret(f, [])
+ assert res == f()
+
class TestLLtype(BaseTestRbuiltin, LLRtypeMixin):
def test_isinstance_obj(self):
More information about the Pypy-commit
mailing list