[pypy-svn] r48788 - in pypy/branch/ropes-unicode/pypy: annotation rpython rpython/lltypesystem rpython/ootypesystem rpython/test

Mon Nov 19 14:51:54 CET 2007

Author: cfbolz
Date: Mon Nov 19 14:51:53 2007
New Revision: 48788

Modified:
   pypy/branch/ropes-unicode/pypy/annotation/unaryop.py
   pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py
   pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py
   pypy/branch/ropes-unicode/pypy/rpython/rstr.py
   pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py
Log:
add encoding and decoding with latin-1


Modified: pypy/branch/ropes-unicode/pypy/annotation/unaryop.py
==============================================================================

--- pypy/branch/ropes-unicode/pypy/annotation/unaryop.py	(original)
+++ pypy/branch/ropes-unicode/pypy/annotation/unaryop.py	Mon Nov 19 14:51:53 2007
@@ -466,7 +466,7 @@
         if not s_enc.is_constant():
             raise TypeError("Non-constant encoding not supported")
         enc = s_enc.const
-        if enc != 'ascii':
+        if enc not in ('ascii', 'latin-1'):
             raise TypeError("Encoding %s not supported for unicode" % (enc,))
         return SomeString()
     method_encode.can_only_throw = [UnicodeEncodeError]
@@ -482,7 +482,7 @@
         if not s_enc.is_constant():
             raise TypeError("Non-constant encoding not supported")
         enc = s_enc.const
-        if enc != 'ascii':
+        if enc not in ('ascii', 'latin-1'):
             raise TypeError("Encoding %s not supported for strings" % (enc,))
         return SomeUnicodeString()
     method_decode.can_only_throw = [UnicodeDecodeError]

Modified: pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py	(original)
+++ pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py	Mon Nov 19 14:51:53 2007
@@ -108,6 +108,13 @@
         self.ll = LLHelpers
         self.malloc = mallocstr
     
+    def ll_decode_latin1(self, value):
+        lgt = len(value.chars)
+        s = mallocunicode(lgt)
+        for i in range(lgt):
+            s.chars[i] = cast_primitive(UniChar, value.chars[i])
+        return s
+
 class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
     lowleveltype = Ptr(UNICODE)
     basetype = basestring
@@ -131,6 +138,17 @@
             result.chars[i] = cast_primitive(Char, c)
         return result
 
+    def ll_encode_latin1(self, s):
+        length = len(s.chars)
+        result = mallocstr(length)
+        for i in range(length):
+            c = s.chars[i]
+            if ord(c) > 255:
+                raise UnicodeEncodeError("character not in latin1 range")
+            result.chars[i] = cast_primitive(Char, c)
+        return result
+
+
 class CharRepr(AbstractCharRepr, StringRepr):
     lowleveltype = Char
 

Modified: pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py	(original)
+++ pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py	Mon Nov 19 14:51:53 2007
@@ -48,6 +48,16 @@
     def make_string(self, value):
         return ootype.make_string(value)
 
+    def ll_decode_latin1(self, value):
+        sb = ootype.new(ootype.UnicodeBuilder)
+        length = value.ll_strlen()
+        sb.ll_allocate(length)
+        for i in range(length):
+            c = value.ll_stritem_nonneg(i)
+            sb.ll_append_char(cast_primitive(UniChar, c))
+        return sb.ll_build()
+
+
 class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
     lowleveltype = ootype.Unicode
     basetype = basestring
@@ -66,6 +76,17 @@
             sb.ll_append_char(cast_primitive(Char, c))
         return sb.ll_build()
 
+    def ll_encode_latin1(self, value):
+        sb = ootype.new(ootype.StringBuilder)
+        length = value.ll_strlen()
+        sb.ll_allocate(length)
+        for i in range(length):
+            c = value.ll_stritem_nonneg(i)
+            if ord(c) > 255:
+                raise UnicodeEncodeError("%d > 255, not latin-1" % ord(c))
+            sb.ll_append_char(cast_primitive(Char, c))
+        return sb.ll_build()
+
 class CharRepr(AbstractCharRepr, StringRepr):
     lowleveltype = Char
 

Modified: pypy/branch/ropes-unicode/pypy/rpython/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/rstr.py	(original)
+++ pypy/branch/ropes-unicode/pypy/rpython/rstr.py	Mon Nov 19 14:51:53 2007
@@ -256,9 +256,17 @@
         return hop.gendirectcall(self.ll.ll_str2unicode, v_str)
 
     def rtype_method_decode(self, hop):
-        v_self = hop.inputarg(self, 0)
+        if not hop.args_s[1].is_constant():
+            raise TyperError("encoding must be a constant")
+        encoding = hop.args_s[1].const
+        v_self = hop.inputarg(self.repr, 0)
         hop.exception_is_here()
-        return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
+        if encoding == 'ascii':
+            return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
+        elif encoding == 'latin-1':
+            return hop.gendirectcall(self.ll_decode_latin1, v_self)
+        else:
+            raise TyperError("encoding %s not implemented" % (encoding, ))
 
     def rtype_float(self, hop):
         hop.has_implicit_exception(ValueError)   # record that we know about it
@@ -272,9 +280,18 @@
 
 class __extend__(AbstractUnicodeRepr):
     def rtype_method_encode(self, hop):
-        v_self = hop.inputarg(self, 0)
+        if not hop.args_s[1].is_constant():
+            raise TyperError("encoding must be constant")
+        encoding = hop.args_s[1].const
+        v_self = hop.inputarg(self.repr, 0)
         hop.exception_is_here()
-        return hop.gendirectcall(self.ll_str, v_self)
+        if encoding == "ascii":
+            return hop.gendirectcall(self.ll_str, v_self)
+        elif encoding == "latin-1":
+            return hop.gendirectcall(self.ll_encode_latin1, v_self)
+        else:
+            raise TyperError("encoding %s not implemented" % (encoding, ))
+
 
 class __extend__(pairtype(AbstractStringRepr, Repr)):
     def rtype_mod((r_str, _), hop):

Modified: pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py	(original)
+++ pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py	Mon Nov 19 14:51:53 2007
@@ -84,26 +84,37 @@
     def test_unicode_encode(self):
         def f(x):
             y = u'xxx'
-            return (y + unichr(x)).encode('ascii')
+            return (y + unichr(x)).encode('ascii') + y.encode('latin-1')
 
         assert self.ll_to_string(self.interpret(f, [38])) == f(38)
 
     def test_unicode_encode_error(self):
-        def f(x):
-            y = u'xxx'
-            try:
-                x = (y + unichr(x)).encode('ascii')
-                return len(x)
-            except UnicodeEncodeError:
-                return -1
-
-        assert self.interpret(f, [38]) == f(38)
-        assert self.interpret(f, [138]) == f(138)
+        def f(x, which):
+            if which:
+                y = u'xxx'
+                try:
+                    x = (y + unichr(x)).encode('ascii')
+                    return len(x)
+                except UnicodeEncodeError:
+                    return -1
+            else:
+                y = u'xxx'
+                try:
+                    x = (y + unichr(x)).encode('latin-1')
+                    return len(x)
+                except UnicodeEncodeError:
+                    return -1
+
+        assert self.interpret(f, [38, True]) == f(38, True)
+        assert self.interpret(f, [138, True]) == f(138, True)
+        assert self.interpret(f, [38, False]) == f(38, False)
+        assert self.interpret(f, [138, False]) == f(138, False)
+        assert self.interpret(f, [300, False]) == f(300, False)
 
     def test_unicode_decode(self):
         def f(x):
             y = 'xxx'
-            return (y + chr(x)).decode('ascii')
+            return (y + chr(x)).decode('ascii') + chr(x).decode("latin-1") 
 
         assert self.ll_to_string(self.interpret(f, [38])) == f(38)