[pypy-commit] pypy default: it is too hard to call unicode_encode_utf_8 from a LL graph while keeping the same annotations computed during normal translation. Instead, we clone a new function with func_with_new_name and we call it. Also, we share the code between LLtype and OOtype now

Fri Aug 31 16:29:09 CEST 2012

Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: 
Changeset: r57054:f06c2ef91129
Date: 2012-08-31 14:29 +0200
http://bitbucket.org/pypy/pypy/changeset/f06c2ef91129/

Log:	it is too hard to call unicode_encode_utf_8 from a LL graph while
	keeping the same annotations computed during normal translation.
	Instead, we clone a new function with func_with_new_name and we call
	it. Also, we share the code between LLtype and OOtype now

diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -1,7 +1,7 @@
 import sys
 from pypy.rlib.bitmanipulation import splitter
 from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rlib.objectmodel import we_are_translated, specialize, enforceargs
 from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
 from pypy.rlib.rarithmetic import r_uint, intmask
 
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -132,9 +132,11 @@
     CACHE = CONST_STR_CACHE
 
     def __init__(self, *args):
+        from pypy.rlib.runicode import str_decode_utf_8
         AbstractStringRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocstr
+        self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, 'rstr_decode_utf_8')
 
     def ll_decode_latin1(self, value):
         lgt = len(value.chars)
@@ -145,10 +147,9 @@
 
     def ll_decode_utf8(self, llvalue):
         from pypy.rpython.annlowlevel import hlstr, llunicode
-        from pypy.rlib.runicode import str_decode_utf_8
         value = hlstr(llvalue)
         assert value is not None
-        univalue, _ = str_decode_utf_8(value, len(value), 'strict')
+        univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
         return llunicode(univalue)
 
 class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
@@ -158,6 +159,7 @@
     CACHE = CONST_UNICODE_CACHE
 
     def __init__(self, *args):
+        from pypy.rlib.runicode import unicode_encode_utf_8
         AbstractUnicodeRepr.__init__(self, *args)
         self.ll = LLHelpers
         self.malloc = mallocunicode
@@ -195,15 +197,6 @@
             result.chars[i] = cast_primitive(Char, c)
         return result
 
-    @jit.elidable
-    def ll_encode_utf8(self, ll_s):
-        from pypy.rpython.annlowlevel import hlunicode, llstr
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        s = hlunicode(ll_s)
-        assert s is not None
-        bytes = unicode_encode_utf_8(s, len(s), 'strict')
-        return llstr(bytes)
-
 class CharRepr(AbstractCharRepr, StringRepr):
     lowleveltype = Char
 
@@ -292,6 +285,8 @@
 
 
 class LLHelpers(AbstractLLHelpers):
+    from pypy.rpython.annlowlevel import llstr
+
     @jit.elidable
     def ll_str_mul(s, times):
         if times < 0:
diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -73,6 +73,10 @@
     lowleveltype = ootype.Unicode
     basetype = basestring
 
+    def __init__(self, *args):
+        BaseOOStringRepr.__init__(self, *args)
+        AbstractUnicodeRepr.__init__(self, *args)
+
     def make_string(self, value):
         return ootype.make_unicode(value)
 
@@ -106,14 +110,6 @@
             sb.ll_append_char(cast_primitive(Char, c))
         return sb.ll_build()
 
-    def ll_encode_utf8(self, ll_s):
-        from pypy.rpython.annlowlevel import hlunicode, oostr
-        from pypy.rlib.runicode import unicode_encode_utf_8
-        s = hlunicode(ll_s)
-        assert s is not None
-        bytes = unicode_encode_utf_8(s, len(s), 'strict')
-        return oostr(bytes)
-
 class CharRepr(AbstractCharRepr, StringRepr):
     lowleveltype = Char
 
@@ -130,6 +126,8 @@
 
 class LLHelpers(AbstractLLHelpers):
 
+    from pypy.rpython.annlowlevel import oostr as llstr
+
     def ll_chr2str(ch):
         return ootype.oostring(ch, -1)
 
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -1,6 +1,8 @@
 from pypy.tool.staticmethods import StaticMethods
 from pypy.tool.pairtype import pairtype, pair
+from pypy.tool.sourcetools import func_with_new_name
 from pypy.annotation import model as annmodel
+from pypy.rlib import jit
 from pypy.rpython.error import TyperError
 from pypy.rpython.rmodel import IntegerRepr, IteratorRepr
 from pypy.rpython.rmodel import inputconst, Repr
@@ -19,12 +21,27 @@
     pass
 
 class AbstractUnicodeRepr(AbstractStringRepr):
+
+    def __init__(self, *args):
+        from pypy.rlib.runicode import unicode_encode_utf_8
+        AbstractStringRepr.__init__(self, *args)
+        self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
+                                                        'runicode_encode_utf_8')
+
     def rtype_method_upper(self, hop):
         raise TypeError("Cannot do toupper on unicode string")
 
     def rtype_method_lower(self, hop):
         raise TypeError("Cannot do tolower on unicode string")
 
+    @jit.elidable
+    def ll_encode_utf8(self, ll_s):
+        from pypy.rpython.annlowlevel import hlunicode
+        s = hlunicode(ll_s)
+        assert s is not None
+        bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+        return self.ll.llstr(bytes)
+
 class __extend__(annmodel.SomeString):
     def rtyper_makerepr(self, rtyper):
         return rtyper.type_system.rstr.string_repr
diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -108,6 +108,9 @@
 
     def test_utf_8_encoding_annotation(self):
         from pypy.rlib.runicode import unicode_encode_utf_8
+        def errorhandler(errors, encoding, msg, u,
+                         startingpos, endingpos):
+            raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
         def f(n):
             x = u'àèì' + unichr(n)
             if x:
@@ -115,7 +118,7 @@
             else:
                 y = u'òìàà'
             # the annotation of y is SomeUnicodeString(can_be_None=False)
-            y = unicode_encode_utf_8(y, len(y), 'strict')
+            y = unicode_encode_utf_8(y, len(y), 'strict', errorhandler)
             return x.encode('utf-8') + y
 
         assert self.ll_to_string(self.interpret(f, [38])) == f(38)