[pypy-svn] r75236 - in pypy/trunk/pypy: annotation annotation/test objspace/std rlib rpython rpython/lltypesystem rpython/test

Wed Jun 9 14:54:33 CEST 2010

Author: arigo
Date: Wed Jun  9 14:54:29 2010
New Revision: 75236

Modified:
   pypy/trunk/pypy/annotation/builtin.py
   pypy/trunk/pypy/annotation/classdef.py
   pypy/trunk/pypy/annotation/test/test_annrpython.py
   pypy/trunk/pypy/objspace/std/unicodetype.py
   pypy/trunk/pypy/rlib/runicode.py
   pypy/trunk/pypy/rpython/lltypesystem/rstr.py
   pypy/trunk/pypy/rpython/rbuiltin.py
   pypy/trunk/pypy/rpython/test/test_rbuiltin.py
Log:
Revert half of r75146, which was half-done (see e.g. test_runicode
failures).  I tried to finish it but it gives a growing mess.  Instead,
I reverted to Unicode{De,En}codeError being RPython exceptions with no
attributes, and just pass an error handler when calling rlib/runicode
from objspace/std/unicodeobject.py.  The error handler knows about the
space and raises directly the app-level exception.


Modified: pypy/trunk/pypy/annotation/builtin.py
==============================================================================

--- pypy/trunk/pypy/annotation/builtin.py	(original)
+++ pypy/trunk/pypy/annotation/builtin.py	Wed Jun  9 14:54:29 2010
@@ -272,12 +272,6 @@
 def OSError_init(s_self, *args):
     pass
 
-def UnicodeDecodeError_init(s_self, *args):
-    pass
-
-def UnicodeEncodeError_init(s_self, *args):
-    pass
-
 def WindowsError_init(s_self, *args):
     pass
 
@@ -396,8 +390,6 @@
 
 BUILTIN_ANALYZERS[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
     OSError_init)
-BUILTIN_ANALYZERS[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = UnicodeDecodeError_init
-BUILTIN_ANALYZERS[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = UnicodeEncodeError_init
 
 try:
     WindowsError

Modified: pypy/trunk/pypy/annotation/classdef.py
==============================================================================
--- pypy/trunk/pypy/annotation/classdef.py	(original)
+++ pypy/trunk/pypy/annotation/classdef.py	Wed Jun  9 14:54:29 2010
@@ -3,7 +3,7 @@
 """
 from pypy.annotation.model import SomePBC, s_ImpossibleValue, unionof
 from pypy.annotation.model import SomeInteger, isdegenerated, SomeTuple,\
-     SomeString, SomeUnicodeString
+     SomeString
 from pypy.annotation import description
 
 
@@ -439,16 +439,6 @@
 
 FORCE_ATTRIBUTES_INTO_CLASSES = {
     OSError: {'errno': SomeInteger()},
-    UnicodeDecodeError: {'end': SomeInteger(),
-                         'start': SomeInteger(),
-                         'object': SomeString(),
-                         'encoding': SomeString(),
-                         'reason': SomeString()},
-    UnicodeEncodeError: {'end': SomeInteger(),
-                         'start': SomeInteger(),
-                         'object': SomeUnicodeString(),
-                         'encoding': SomeString(),
-                         'reason': SomeString()}
     }
 
 try:

Modified: pypy/trunk/pypy/annotation/test/test_annrpython.py
==============================================================================
--- pypy/trunk/pypy/annotation/test/test_annrpython.py	(original)
+++ pypy/trunk/pypy/annotation/test/test_annrpython.py	Wed Jun  9 14:54:29 2010
@@ -3332,16 +3332,6 @@
         a = self.RPythonAnnotator()
         py.test.raises(AssertionError, a.build_types, f, [])
 
-    def test_unicode_decode_error(self):
-        def f():
-            try:
-                raise UnicodeDecodeError("x", "x", 0, 1, "reason")
-            except UnicodeDecodeError, ude:
-                return ude.end
-
-        a = self.RPythonAnnotator()
-        s = a.build_types(f, [])
-        assert isinstance(s, annmodel.SomeInteger)
 
 def g(n):
     return [0,1,2,n]

Modified: pypy/trunk/pypy/objspace/std/unicodetype.py
==============================================================================
--- pypy/trunk/pypy/objspace/std/unicodetype.py	(original)
+++ pypy/trunk/pypy/objspace/std/unicodetype.py	Wed Jun  9 14:54:29 2010
@@ -170,6 +170,32 @@
 
 # ____________________________________________________________
 
+def decode_error_handler(space):
+    def raise_unicode_exception_decode(errors, encoding, msg, s,
+                                       startingpos, endingpos):
+        raise OperationError(space.w_UnicodeDecodeError,
+                             space.newtuple([space.wrap(encoding),
+                                             space.wrap(s),
+                                             space.wrap(startingpos),
+                                             space.wrap(endingpos),
+                                             space.wrap(msg)]))
+    return raise_unicode_exception_decode
+decode_error_handler._annspecialcase_ = 'specialize:memo'
+
+def encode_error_handler(space):
+    def raise_unicode_exception_encode(errors, encoding, msg, u,
+                                       startingpos, endingpos):
+        raise OperationError(space.w_UnicodeEncodeError,
+                             space.newtuple([space.wrap(encoding),
+                                             space.wrap(u),
+                                             space.wrap(startingpos),
+                                             space.wrap(endingpos),
+                                             space.wrap(msg)]))
+    return raise_unicode_exception_encode
+encode_error_handler._annspecialcase_ = 'specialize:memo'
+
+# ____________________________________________________________
+
 def getdefaultencoding(space):
     return space.sys.defaultencoding
 
@@ -191,21 +217,16 @@
         w_encoder = space.sys.get_w_default_encoder()
     else:
         if errors is None or errors == 'strict':
-            try:
-                if encoding == 'ascii':
-                    u = space.unicode_w(w_object)
-                    return space.wrap(unicode_encode_ascii(u, len(u), None))
-                if encoding == 'utf-8':
-                    u = space.unicode_w(w_object)
-                    return space.wrap(unicode_encode_utf_8(u, len(u), None))
-            except UnicodeEncodeError, uee:
-                raise OperationError(space.w_UnicodeEncodeError,
-                                     space.newtuple([
-                                         space.wrap(uee.encoding),
-                                         space.wrap(uee.object),
-                                         space.wrap(uee.start),
-                                         space.wrap(uee.end),
-                                         space.wrap(uee.reason)]))
+            if encoding == 'ascii':
+                u = space.unicode_w(w_object)
+                eh = encode_error_handler(space)
+                return space.wrap(unicode_encode_ascii(u, len(u), None,
+                                                       errorhandler=eh))
+            if encoding == 'utf-8':
+                u = space.unicode_w(w_object)
+                eh = encode_error_handler(space)
+                return space.wrap(unicode_encode_utf_8(u, len(u), None,
+                                                       errorhandler=eh))
         from pypy.module._codecs.interp_codecs import lookup_codec
         w_encoder = space.getitem(lookup_codec(space, encoding), space.wrap(0))
     if errors is None:
@@ -224,19 +245,17 @@
     if encoding is None:
         encoding = getdefaultencoding(space)
     if errors is None or errors == 'strict':
-        try:
-            if encoding == 'ascii':
-                # XXX error handling
-                s = space.bufferstr_w(w_obj)
-                return space.wrap(str_decode_ascii(s, len(s), None)[0])
-            if encoding == 'utf-8':
-                s = space.bufferstr_w(w_obj)
-                return space.wrap(str_decode_utf_8(s, len(s), None)[0])
-        except UnicodeDecodeError, ude:
-            raise OperationError(space.w_UnicodeDecodeError, space.newtuple(
-                [space.wrap(ude.encoding), space.wrap(ude.object),
-                 space.wrap(ude.start), space.wrap(ude.end),
-                 space.wrap(ude.reason)]))
+        if encoding == 'ascii':
+            # XXX error handling
+            s = space.bufferstr_w(w_obj)
+            eh = decode_error_handler(space)
+            return space.wrap(str_decode_ascii(s, len(s), None,
+                                               errorhandler=eh)[0])
+        if encoding == 'utf-8':
+            s = space.bufferstr_w(w_obj)
+            eh = decode_error_handler(space)
+            return space.wrap(str_decode_utf_8(s, len(s), None,
+                                               errorhandler=eh)[0])
     w_codecs = space.getbuiltinmodule("_codecs")
     w_decode = space.getattr(w_codecs, space.wrap("decode"))
     if errors is None:

Modified: pypy/trunk/pypy/rlib/runicode.py
==============================================================================
--- pypy/trunk/pypy/rlib/runicode.py	(original)
+++ pypy/trunk/pypy/rlib/runicode.py	Wed Jun  9 14:54:29 2010
@@ -48,14 +48,12 @@
 def raise_unicode_exception_decode(errors, encoding, msg, s,
                                    startingpos, endingpos):
     assert isinstance(s, str)
-    raise UnicodeDecodeError(
-            encoding, s[startingpos], startingpos, endingpos, msg)
+    raise UnicodeDecodeError(encoding, s, startingpos, endingpos, msg)
 
 def raise_unicode_exception_encode(errors, encoding, msg, u,
                                    startingpos, endingpos):
     assert isinstance(u, unicode)
-    raise UnicodeEncodeError(
-            encoding, u[startingpos], startingpos, endingpos, msg)
+    raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
 
 # ____________________________________________________________ 
 # unicode decoding

Modified: pypy/trunk/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/trunk/pypy/rpython/lltypesystem/rstr.py	(original)
+++ pypy/trunk/pypy/rpython/lltypesystem/rstr.py	Wed Jun  9 14:54:29 2010
@@ -18,7 +18,6 @@
 from pypy.rpython.rmodel import Repr
 from pypy.rpython.lltypesystem import llmemory
 from pypy.tool.sourcetools import func_with_new_name
-from pypy.rpython.annlowlevel import hlstr
 
 # ____________________________________________________________
 #
@@ -297,7 +296,7 @@
         s = mallocunicode(lgt)
         for i in range(lgt):
             if ord(str.chars[i]) > 127:
-                raise UnicodeDecodeError("ascii", hlstr(str), 0, lgt, "ascii codec can't encode %d" % ord(str.chars[i]))
+                raise UnicodeDecodeError
             s.chars[i] = cast_primitive(UniChar, str.chars[i])
         return s
     ll_str2unicode.oopspec = 'str.str2unicode(str)'

Modified: pypy/trunk/pypy/rpython/rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/rbuiltin.py	(original)
+++ pypy/trunk/pypy/rpython/rbuiltin.py	Wed Jun  9 14:54:29 2010
@@ -270,37 +270,6 @@
         v_errno = hop.inputarg(lltype.Signed, arg=1)
         r_self.setfield(v_self, 'errno', v_errno, hop.llops)
 
-def rtype_UnicodeDecodeError_init(hop):
-    if hop.nb_args != 6:
-        raise TypeError("UnicodeDecodeError() should be called with 5 "
-                        "arguments")
-    r_self = hop.args_r[0]
-    r_str = hop.rtyper.type_system.rstr.string_repr
-    TPS = [hop.args_r[0], r_str, r_str, lltype.Signed, lltype.Signed,
-           r_str]
-    v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
-    r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
-    r_self.setfield(v_self, 'object', v_obj, hop.llops)
-    r_self.setfield(v_self, 'start', v_start, hop.llops)
-    r_self.setfield(v_self, 'end', v_end, hop.llops)
-    r_self.setfield(v_self, 'reason', v_msg, hop.llops)
-
-def rtype_UnicodeEncodeError_init(hop):
-    if hop.nb_args != 6:
-        raise TypeError("UnicodeEncodeError() should be called with 5 "
-                        "arguments")
-    r_self = hop.args_r[0]
-    r_str = hop.rtyper.type_system.rstr.string_repr
-    r_unicode = hop.rtyper.type_system.rstr.unicode_repr
-    TPS = [hop.args_r[0], r_str, r_unicode, lltype.Signed, lltype.Signed,
-           r_str]
-    v_self, v_encoding, v_obj, v_start, v_end, v_msg = hop.inputargs(*TPS)
-    r_self.setfield(v_self, 'encoding', v_encoding, hop.llops)
-    r_self.setfield(v_self, 'object', v_obj, hop.llops)
-    r_self.setfield(v_self, 'start', v_start, hop.llops)
-    r_self.setfield(v_self, 'end', v_end, hop.llops)
-    r_self.setfield(v_self, 'reason', v_msg, hop.llops)
-
 def rtype_WindowsError__init__(hop):
     if hop.nb_args == 2:
         raise TyperError("WindowsError() should not be called with "
@@ -360,8 +329,6 @@
 
 BUILTIN_TYPER[getattr(OSError.__init__, 'im_func', OSError.__init__)] = (
     rtype_OSError__init__)
-BUILTIN_TYPER[getattr(UnicodeDecodeError.__init__, 'im_func', UnicodeDecodeError.__init__)] = rtype_UnicodeDecodeError_init
-BUILTIN_TYPER[getattr(UnicodeEncodeError.__init__, 'im_func', UnicodeEncodeError.__init__)] = rtype_UnicodeEncodeError_init
 
 try:
     WindowsError

Modified: pypy/trunk/pypy/rpython/test/test_rbuiltin.py
==============================================================================
--- pypy/trunk/pypy/rpython/test/test_rbuiltin.py	(original)
+++ pypy/trunk/pypy/rpython/test/test_rbuiltin.py	Wed Jun  9 14:54:29 2010
@@ -496,35 +496,6 @@
         res = self.interpret(llf, [rffi.r_short(123)], policy=LowLevelAnnotatorPolicy())
         assert res == 123
 
-    def test_unicode_errors(self):
-        def f():
-            try:
-                raise UnicodeDecodeError("xx", "x", 0, 1, "reason")
-            except UnicodeDecodeError, ude:
-                assert ude.start == 0
-                assert ude.encoding == "xx"
-                assert ude.object == "x"
-                assert ude.start == 0
-                assert ude.reason == "reason"
-                return ude.end
-
-        res = self.interpret(f, [])
-        assert res == f()
-
-        def f():
-            try:
-                raise UnicodeEncodeError("xx", u"x", 0, 1, "reason")
-            except UnicodeEncodeError, ude:
-                assert ude.start == 0
-                assert ude.encoding == "xx"
-                assert ude.object == u"x"
-                assert ude.start == 0
-                assert ude.reason == "reason"
-                return ude.end 
-
-        res = self.interpret(f, [])
-        assert res == f()        
-
 class TestLLtype(BaseTestRbuiltin, LLRtypeMixin):
 
     def test_isinstance_obj(self):