[pypy-svn] r75146 - in pypy/trunk: . pypy/annotation pypy/annotation/test pypy/module/_codecs pypy/module/sys pypy/objspace/std pypy/rpython pypy/rpython/lltypesystem pypy/rpython/test

Sun Jun 6 17:34:23 CEST 2010

Author: fijal
Date: Sun Jun  6 17:34:21 2010
New Revision: 75146

Modified:
   pypy/trunk/   (props changed)
   pypy/trunk/pypy/annotation/binaryop.py
   pypy/trunk/pypy/annotation/builtin.py
   pypy/trunk/pypy/annotation/classdef.py
   pypy/trunk/pypy/annotation/test/test_annrpython.py
   pypy/trunk/pypy/module/_codecs/interp_codecs.py
   pypy/trunk/pypy/module/sys/__init__.py
   pypy/trunk/pypy/module/sys/interp_encoding.py
   pypy/trunk/pypy/objspace/std/unicodetype.py
   pypy/trunk/pypy/rpython/lltypesystem/rstr.py
   pypy/trunk/pypy/rpython/rbuiltin.py
   pypy/trunk/pypy/rpython/test/test_rbuiltin.py
Log:
Merge encodings-unfreeze branch. This should kill freezing of encodings
module from lib-python.


Modified: pypy/trunk/pypy/annotation/binaryop.py
==============================================================================

--- pypy/trunk/pypy/annotation/binaryop.py	(original)
+++ pypy/trunk/pypy/annotation/binaryop.py	Sun Jun  6 17:34:21 2010
@@ -442,7 +442,7 @@
         return SomeUnicodeString()
 
 class __extend__(pairtype(SomeString, SomeUnicodeString),
-                 pairtype(SomeString, SomeUnicodeString)):
+                 pairtype(SomeUnicodeString, SomeString)):
     def mod((str, unistring)):
         raise NotImplementedError(
             "string formatting mixing strings and unicode not supported")

Modified: pypy/trunk/pypy/annotation/builtin.py
==============================================================================
--- pypy/trunk/pypy/annotation/builtin.py	(original)
+++ pypy/trunk/pypy/annotation/builtin.py	Sun Jun  6 17:34:21 2010
@@ -272,6 +272,12 @@
 def OSError_init(s_self, *args):
     pass
 
+def UnicodeDecodeError_init(s_self, *args):
+    pass
+
+def UnicodeEncodeError_init(s_self, *args):
+    pass
+
 def WindowsError_init(s_self, *args):
     pass
 
@@ -390,6 +396,8 @@
 
 BUILTIN_ANALYZERS[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
     OSError_init)
+BUILTIN_ANALYZERS[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = UnicodeDecodeError_init
+BUILTIN_ANALYZERS[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = UnicodeEncodeError_init
 
 try:
     WindowsError

Modified: pypy/trunk/pypy/annotation/classdef.py
==============================================================================
--- pypy/trunk/pypy/annotation/classdef.py	(original)
+++ pypy/trunk/pypy/annotation/classdef.py	Sun Jun  6 17:34:21 2010
@@ -3,7 +3,7 @@
 """
 from pypy.annotation.model import SomePBC, s_ImpossibleValue, unionof
 from pypy.annotation.model import SomeInteger, isdegenerated, SomeTuple,\
-     SomeString
+     SomeString, SomeUnicodeString
 from pypy.annotation import description
 
 
@@ -439,6 +439,16 @@
 
 FORCE_ATTRIBUTES_INTO_CLASSES = {
     OSError: {'errno': SomeInteger()},
+    UnicodeDecodeError: {'end': SomeInteger(),
+                         'start': SomeInteger(),
+                         'object': SomeString(),
+                         'encoding': SomeString(),
+                         'reason': SomeString()},
+    UnicodeEncodeError: {'end': SomeInteger(),
+                         'start': SomeInteger(),
+                         'object': SomeUnicodeString(),
+                         'encoding': SomeString(),
+                         'reason': SomeString()}
     }
 
 try:

Modified: pypy/trunk/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/trunk/pypy/annotation/test/test_annrpython.py	(original)
+++ pypy/trunk/pypy/annotation/test/test_annrpython.py	Sun Jun  6 17:34:21 2010
@@ -3321,6 +3321,17 @@
         s = a.build_types(g, [int])
         assert a.bookkeeper.getdesc(f).getuniquegraph()
 
+    def test_unicode_decode_error(self):
+        def f():
+            try:
+                raise UnicodeDecodeError("x", "x", 0, 1, "reason")
+            except UnicodeDecodeError, ude:
+                return ude.end
+
+        a = self.RPythonAnnotator()
+        s = a.build_types(f, [])
+        assert isinstance(s, annmodel.SomeInteger)
+
 def g(n):
     return [0,1,2,n]
 

Modified: pypy/trunk/pypy/module/_codecs/interp_codecs.py
==============================================================================
--- pypy/trunk/pypy/module/_codecs/interp_codecs.py	(original)
+++ pypy/trunk/pypy/module/_codecs/interp_codecs.py	Sun Jun  6 17:34:21 2010
@@ -2,6 +2,7 @@
 from pypy.interpreter.gateway import ObjSpace, NoneNotWrapped, applevel
 from pypy.interpreter.baseobjspace import W_Root
 from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
+from pypy.rlib.objectmodel import we_are_translated
 
 class CodecState(object):
     def __init__(self, space):
@@ -52,6 +53,9 @@
                 return replace, newpos
         return unicode_call_errorhandler
 
+    def _freeze_(self):
+        assert not self.codec_search_path
+        return False
 
 def register_codec(space, w_search_function):
     """register(search_function)
@@ -75,6 +79,8 @@
     Looks up a codec tuple in the Python codec registry and returns
     a tuple of functions.
     """
+    assert not (space.config.translating and not we_are_translated()), \
+        "lookup_codec() should not be called during translation"
     state = space.fromcache(CodecState)
     normalized_encoding = encoding.replace(" ", "-").lower()    
     w_result = state.codec_search_cache.get(normalized_encoding, None)
@@ -215,11 +221,8 @@
     else:
         encoding = space.str_w(w_encoding)
     w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
-    if space.is_true(w_encoder):
-        w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
-        return space.getitem(w_res, space.wrap(0))
-    else:
-        assert 0, "XXX, what to do here?"
+    w_res = space.call_function(w_encoder, w_obj, space.wrap(errors))
+    return space.getitem(w_res, space.wrap(0))
 encode.unwrap_spec = [ObjSpace, W_Root, W_Root, str]
 
 def buffer_encode(space, s, errors='strict'):

Modified: pypy/trunk/pypy/module/sys/__init__.py
==============================================================================
--- pypy/trunk/pypy/module/sys/__init__.py	(original)
+++ pypy/trunk/pypy/module/sys/__init__.py	Sun Jun  6 17:34:21 2010
@@ -1,5 +1,6 @@
 from pypy.interpreter.mixedmodule import MixedModule
 from pypy.interpreter.error import OperationError
+from pypy.rlib.objectmodel import we_are_translated
 import sys
 
 class Module(MixedModule):
@@ -92,8 +93,12 @@
         self.space.setitem(w_modules, w_name, w_module)
 
     def startup(self, space):
-        from pypy.module.sys.interp_encoding import _getfilesystemencoding
-        self.filesystemencoding = _getfilesystemencoding(space)
+        if space.config.translating and not we_are_translated():
+            # don't get the filesystemencoding at translation time
+            assert self.filesystemencoding is None
+        else:
+            from pypy.module.sys.interp_encoding import _getfilesystemencoding
+            self.filesystemencoding = _getfilesystemencoding(space)
 
     def getmodule(self, name): 
         space = self.space

Modified: pypy/trunk/pypy/module/sys/interp_encoding.py
==============================================================================
--- pypy/trunk/pypy/module/sys/interp_encoding.py	(original)
+++ pypy/trunk/pypy/module/sys/interp_encoding.py	Sun Jun  6 17:34:21 2010
@@ -1,5 +1,6 @@
 import sys
 from pypy.rlib import rlocale
+from pypy.rlib.objectmodel import we_are_translated
 
 def getdefaultencoding(space):
     """Return the current default string encoding used by the Unicode 
@@ -18,6 +19,8 @@
     space.sys.defaultencoding = encoding
 
 def get_w_default_encoder(space):
+    assert not (space.config.translating and not we_are_translated()), \
+        "get_w_default_encoder() should not be called during translation"
     w_encoding = space.wrap(space.sys.defaultencoding)
     mod = space.getbuiltinmodule("_codecs")
     w_lookup = space.getattr(mod, space.wrap("lookup"))
@@ -40,6 +43,8 @@
         rlocale.setlocale(rlocale.LC_CTYPE, "")
         loc_codeset = rlocale.nl_langinfo(rlocale.CODESET)
         if loc_codeset:
+            if loc_codeset == 'ANSI_X3.4-1968':
+                loc_codeset = 'ascii'
             codecmod = space.getbuiltinmodule('_codecs')
             w_res = space.call_function(space.getattr(codecmod,
                                                       space.wrap('lookup')),

Modified: pypy/trunk/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/unicodetype.py	(original)
+++ pypy/trunk/pypy/objspace/std/unicodetype.py	Sun Jun  6 17:34:21 2010
@@ -3,6 +3,8 @@
 from pypy.objspace.std.stdtypedef import StdTypeDef, SMM
 from pypy.objspace.std.register_all import register_all
 from pypy.objspace.std.basestringtype import basestring_typedef
+from pypy.rlib.runicode import str_decode_utf_8, str_decode_ascii,\
+     unicode_encode_utf_8, unicode_encode_ascii
 
 from sys import maxint
 
@@ -183,13 +185,27 @@
     return encoding, errors
 
 def encode_object(space, w_object, encoding, errors):
-    # XXX write down shortcuts for performance for common encodings,
-    #     just like CPython
     if encoding is None:
         # Get the encoder functions as a wrapped object.
         # This lookup is cached.
         w_encoder = space.sys.get_w_default_encoder()
     else:
+        if errors is None or errors == 'strict':
+            try:
+                if encoding == 'ascii':
+                    u = space.unicode_w(w_object)
+                    return space.wrap(unicode_encode_ascii(u, len(u), None))
+                if encoding == 'utf-8':
+                    u = space.unicode_w(w_object)
+                    return space.wrap(unicode_encode_utf_8(u, len(u), None))
+            except UnicodeEncodeError, uee:
+                raise OperationError(space.w_UnicodeEncodeError,
+                                     space.newtuple([
+                                         space.wrap(uee.encoding),
+                                         space.wrap(uee.object),
+                                         space.wrap(uee.start),
+                                         space.wrap(uee.end),
+                                         space.wrap(uee.reason)]))
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
     if errors is None:
@@ -205,9 +221,23 @@
     return w_retval
 
 def decode_object(space, w_obj, encoding, errors):
-    w_codecs = space.getbuiltinmodule("_codecs")
     if encoding is None:
         encoding = getdefaultencoding(space)
+    if errors is None or errors == 'strict':
+        try:
+            if encoding == 'ascii':
+                # XXX error handling
+                s = space.bufferstr_w(w_obj)
+                return space.wrap(str_decode_ascii(s, len(s), None)[0])
+            if encoding == 'utf-8':
+                s = space.bufferstr_w(w_obj)
+                return space.wrap(str_decode_utf_8(s, len(s), None)[0])
+        except UnicodeDecodeError, ude:
+            raise OperationError(space.w_UnicodeDecodeError, space.newtuple(
+                [space.wrap(ude.encoding), space.wrap(ude.object),
+                 space.wrap(ude.start), space.wrap(ude.end),
+                 space.wrap(ude.reason)]))
+    w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.wrap("decode"))
     if errors is None:
         w_retval = space.call_function(w_decode, w_obj, space.wrap(encoding))

Modified: pypy/trunk/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/rstr.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/rstr.py	Sun Jun  6 17:34:21 2010
@@ -18,6 +18,7 @@
 from pypy.rpython.rmodel import Repr
 from pypy.rpython.lltypesystem import llmemory
 from pypy.tool.sourcetools import func_with_new_name
+from pypy.rpython.annlowlevel import hlstr
 
 # ____________________________________________________________
 #
@@ -296,7 +297,7 @@
         s = mallocunicode(lgt)
         for i in range(lgt):
             if ord(str.chars[i]) > 127:
-                raise UnicodeDecodeError
+                raise UnicodeDecodeError("ascii", hlstr(str), 0, lgt, "ascii codec can't encode %d" % ord(str.chars[i]))
             s.chars[i] = cast_primitive(UniChar, str.chars[i])
         return s
     ll_str2unicode.oopspec = 'str.str2unicode(str)'

Modified: pypy/trunk/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/rbuiltin.py	(original)
+++ pypy/trunk/pypy/rpython/rbuiltin.py	Sun Jun  6 17:34:21 2010
@@ -270,6 +270,37 @@
         v_errno = hop.inputarg(lltype.Signed, arg=1)
         r_self.setfield(v_self, 'errno', v_errno, hop.llops)
 
+def rtype_UnicodeDecodeError_init(hop):
+    if hop.nb_args != 6:
+        raise TypeError("UnicodeDecodeError() should be called with 5 "
+                        "arguments")
+    r_self = hop.args_r[0]
+    r_str = hop.rtyper.type_system.rstr.string_repr
+    TPS = [hop.args_r[0], r_str, r_str, lltype.Signed, lltype.Signed,
+           r_str]
+    v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
+    r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
+    r_self.setfield(v_self, 'object', v_obj, hop.llops)
+    r_self.setfield(v_self, 'start', v_start, hop.llops)
+    r_self.setfield(v_self, 'end', v_end, hop.llops)
+    r_self.setfield(v_self, 'reason', v_msg, hop.llops)
+
+def rtype_UnicodeEncodeError_init(hop):
+    if hop.nb_args != 6:
+        raise TypeError("UnicodeEncodeError() should be called with 5 "
+                        "arguments")
+    r_self = hop.args_r[0]
+    r_str = hop.rtyper.type_system.rstr.string_repr
+    r_unicode = hop.rtyper.type_system.rstr.unicode_repr
+    TPS = [hop.args_r[0], r_str, r_unicode, lltype.Signed, lltype.Signed,
+           r_str]
+    v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
+    r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
+    r_self.setfield(v_self, 'object', v_obj, hop.llops)
+    r_self.setfield(v_self, 'start', v_start, hop.llops)
+    r_self.setfield(v_self, 'end', v_end, hop.llops)
+    r_self.setfield(v_self, 'reason', v_msg, hop.llops)
+
 def rtype_WindowsError__init__(hop):
     if hop.nb_args == 2:
         raise TyperError("WindowsError() should not be called with "
@@ -329,6 +360,8 @@
 
 BUILTIN_TYPER[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
     rtype_OSError__init__)
+BUILTIN_TYPER[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = rtype_UnicodeDecodeError_init
+BUILTIN_TYPER[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = rtype_UnicodeEncodeError_init
 
 try:
     WindowsError

Modified: pypy/trunk/pypy/rpython/test/test_rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/test/test_rbuiltin.py	(original)
+++ pypy/trunk/pypy/rpython/test/test_rbuiltin.py	Sun Jun  6 17:34:21 2010
@@ -496,6 +496,35 @@
         res = self.interpret(llf, [rffi.r_short(123)], policy=LowLevelAnnotatorPolicy())
         assert res == 123
 
+    def test_unicode_errors(self):
+        def f():
+            try:
+                raise UnicodeDecodeError("xx", "x", 0, 1, "reason")
+            except UnicodeDecodeError, ude:
+                assert ude.start == 0
+                assert ude.encoding == "xx"
+                assert ude.object == "x"
+                assert ude.start == 0
+                assert ude.reason == "reason"
+                return ude.end
+
+        res = self.interpret(f, [])
+        assert res == f()
+
+        def f():
+            try:
+                raise UnicodeEncodeError("xx", u"x", 0, 1, "reason")
+            except UnicodeEncodeError, ude:
+                assert ude.start == 0
+                assert ude.encoding == "xx"
+                assert ude.object == u"x"
+                assert ude.start == 0
+                assert ude.reason == "reason"
+                return ude.end 
+
+        res = self.interpret(f, [])
+        assert res == f()        
+
 class TestLLtype(BaseTestRbuiltin, LLRtypeMixin):
 
     def test_isinstance_obj(self):