[pypy-commit] pypy default: it is too hard to call unicode_encode_utf_8 from a LL graph while keeping the same annotations computed during normal translation. Instead, we clone a new function with func_with_new_name and we call it. Also, we share the code between LLtype and OOtype now
antocuni
noreply at buildbot.pypy.org
Fri Aug 31 16:29:09 CEST 2012
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch:
Changeset: r57054:f06c2ef91129
Date: 2012-08-31 14:29 +0200
http://bitbucket.org/pypy/pypy/changeset/f06c2ef91129/
Log: it is too hard to call unicode_encode_utf_8 from a LL graph while
keeping the same annotations computed during normal translation.
Instead, we clone a new function with func_with_new_name and we call
it. Also, we share the code between LLtype and OOtype now
diff --git a/pypy/rlib/runicode.py b/pypy/rlib/runicode.py
--- a/pypy/rlib/runicode.py
+++ b/pypy/rlib/runicode.py
@@ -1,7 +1,7 @@
import sys
from pypy.rlib.bitmanipulation import splitter
from pypy.rpython.lltypesystem import lltype, rffi
-from pypy.rlib.objectmodel import we_are_translated, specialize
+from pypy.rlib.objectmodel import we_are_translated, specialize, enforceargs
from pypy.rlib.rstring import StringBuilder, UnicodeBuilder
from pypy.rlib.rarithmetic import r_uint, intmask
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -132,9 +132,11 @@
CACHE = CONST_STR_CACHE
def __init__(self, *args):
+ from pypy.rlib.runicode import str_decode_utf_8
AbstractStringRepr.__init__(self, *args)
self.ll = LLHelpers
self.malloc = mallocstr
+ self.rstr_decode_utf_8 = func_with_new_name(str_decode_utf_8, 'rstr_decode_utf_8')
def ll_decode_latin1(self, value):
lgt = len(value.chars)
@@ -145,10 +147,9 @@
def ll_decode_utf8(self, llvalue):
from pypy.rpython.annlowlevel import hlstr, llunicode
- from pypy.rlib.runicode import str_decode_utf_8
value = hlstr(llvalue)
assert value is not None
- univalue, _ = str_decode_utf_8(value, len(value), 'strict')
+ univalue, _ = self.rstr_decode_utf_8(value, len(value), 'strict')
return llunicode(univalue)
class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
@@ -158,6 +159,7 @@
CACHE = CONST_UNICODE_CACHE
def __init__(self, *args):
+ from pypy.rlib.runicode import unicode_encode_utf_8
AbstractUnicodeRepr.__init__(self, *args)
self.ll = LLHelpers
self.malloc = mallocunicode
@@ -195,15 +197,6 @@
result.chars[i] = cast_primitive(Char, c)
return result
- @jit.elidable
- def ll_encode_utf8(self, ll_s):
- from pypy.rpython.annlowlevel import hlunicode, llstr
- from pypy.rlib.runicode import unicode_encode_utf_8
- s = hlunicode(ll_s)
- assert s is not None
- bytes = unicode_encode_utf_8(s, len(s), 'strict')
- return llstr(bytes)
-
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
@@ -292,6 +285,8 @@
class LLHelpers(AbstractLLHelpers):
+ from pypy.rpython.annlowlevel import llstr
+
@jit.elidable
def ll_str_mul(s, times):
if times < 0:
diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -73,6 +73,10 @@
lowleveltype = ootype.Unicode
basetype = basestring
+ def __init__(self, *args):
+ BaseOOStringRepr.__init__(self, *args)
+ AbstractUnicodeRepr.__init__(self, *args)
+
def make_string(self, value):
return ootype.make_unicode(value)
@@ -106,14 +110,6 @@
sb.ll_append_char(cast_primitive(Char, c))
return sb.ll_build()
- def ll_encode_utf8(self, ll_s):
- from pypy.rpython.annlowlevel import hlunicode, oostr
- from pypy.rlib.runicode import unicode_encode_utf_8
- s = hlunicode(ll_s)
- assert s is not None
- bytes = unicode_encode_utf_8(s, len(s), 'strict')
- return oostr(bytes)
-
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
@@ -130,6 +126,8 @@
class LLHelpers(AbstractLLHelpers):
+ from pypy.rpython.annlowlevel import oostr as llstr
+
def ll_chr2str(ch):
return ootype.oostring(ch, -1)
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -1,6 +1,8 @@
from pypy.tool.staticmethods import StaticMethods
from pypy.tool.pairtype import pairtype, pair
+from pypy.tool.sourcetools import func_with_new_name
from pypy.annotation import model as annmodel
+from pypy.rlib import jit
from pypy.rpython.error import TyperError
from pypy.rpython.rmodel import IntegerRepr, IteratorRepr
from pypy.rpython.rmodel import inputconst, Repr
@@ -19,12 +21,27 @@
pass
class AbstractUnicodeRepr(AbstractStringRepr):
+
+ def __init__(self, *args):
+ from pypy.rlib.runicode import unicode_encode_utf_8
+ AbstractStringRepr.__init__(self, *args)
+ self.runicode_encode_utf_8 = func_with_new_name(unicode_encode_utf_8,
+ 'runicode_encode_utf_8')
+
def rtype_method_upper(self, hop):
raise TypeError("Cannot do toupper on unicode string")
def rtype_method_lower(self, hop):
raise TypeError("Cannot do tolower on unicode string")
+ @jit.elidable
+ def ll_encode_utf8(self, ll_s):
+ from pypy.rpython.annlowlevel import hlunicode
+ s = hlunicode(ll_s)
+ assert s is not None
+ bytes = self.runicode_encode_utf_8(s, len(s), 'strict')
+ return self.ll.llstr(bytes)
+
class __extend__(annmodel.SomeString):
def rtyper_makerepr(self, rtyper):
return rtyper.type_system.rstr.string_repr
diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -108,6 +108,9 @@
def test_utf_8_encoding_annotation(self):
from pypy.rlib.runicode import unicode_encode_utf_8
+ def errorhandler(errors, encoding, msg, u,
+ startingpos, endingpos):
+ raise UnicodeEncodeError(encoding, u, startingpos, endingpos, msg)
def f(n):
x = u'àèì' + unichr(n)
if x:
@@ -115,7 +118,7 @@
else:
y = u'òìàà'
# the annotation of y is SomeUnicodeString(can_be_None=False)
- y = unicode_encode_utf_8(y, len(y), 'strict')
+ y = unicode_encode_utf_8(y, len(y), 'strict', errorhandler)
return x.encode('utf-8') + y
assert self.ll_to_string(self.interpret(f, [38])) == f(38)
More information about the pypy-commit
mailing list