[pypy-commit] pypy rpython-utf8: add the possibility of doing x.encode('utf-8') in rpython
antocuni
noreply at buildbot.pypy.org
Thu Aug 30 16:24:57 CEST 2012
Author: Antonio Cuni <anto.cuni at gmail.com>
Branch: rpython-utf8
Changeset: r56941:c3807d8cd57a
Date: 2012-08-30 16:06 +0200
http://bitbucket.org/pypy/pypy/changeset/c3807d8cd57a/
Log: add the possibility of doing x.encode('utf-8') in rpython
diff --git a/pypy/annotation/unaryop.py b/pypy/annotation/unaryop.py
--- a/pypy/annotation/unaryop.py
+++ b/pypy/annotation/unaryop.py
@@ -530,7 +530,7 @@
if not s_enc.is_constant():
raise TypeError("Non-constant encoding not supported")
enc = s_enc.const
- if enc not in ('ascii', 'latin-1'):
+ if enc not in ('ascii', 'latin-1', 'utf-8'):
raise TypeError("Encoding %s not supported for unicode" % (enc,))
return SomeString()
method_encode.can_only_throw = [UnicodeEncodeError]
diff --git a/pypy/rpython/lltypesystem/rstr.py b/pypy/rpython/lltypesystem/rstr.py
--- a/pypy/rpython/lltypesystem/rstr.py
+++ b/pypy/rpython/lltypesystem/rstr.py
@@ -187,6 +187,14 @@
result.chars[i] = cast_primitive(Char, c)
return result
+ @jit.elidable
+ def ll_encode_utf8(self, ll_s):
+ from pypy.rpython.annlowlevel import hlunicode, llstr
+ from pypy.rlib.runicode import unicode_encode_utf_8
+ s = hlunicode(ll_s)
+ bytes = unicode_encode_utf_8(s, len(s), 'strict')
+ return llstr(bytes)
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
diff --git a/pypy/rpython/ootypesystem/rstr.py b/pypy/rpython/ootypesystem/rstr.py
--- a/pypy/rpython/ootypesystem/rstr.py
+++ b/pypy/rpython/ootypesystem/rstr.py
@@ -98,6 +98,13 @@
sb.ll_append_char(cast_primitive(Char, c))
return sb.ll_build()
+ def ll_encode_utf8(self, ll_s):
+ from pypy.rpython.annlowlevel import hlunicode, oostr
+ from pypy.rlib.runicode import unicode_encode_utf_8
+ s = hlunicode(ll_s)
+ bytes = unicode_encode_utf_8(s, len(s), 'strict')
+ return oostr(bytes)
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
diff --git a/pypy/rpython/rstr.py b/pypy/rpython/rstr.py
--- a/pypy/rpython/rstr.py
+++ b/pypy/rpython/rstr.py
@@ -340,6 +340,8 @@
return hop.gendirectcall(self.ll_str, v_self)
elif encoding == "latin-1":
return hop.gendirectcall(self.ll_encode_latin1, v_self)
+ elif encoding == 'utf-8':
+ return hop.gendirectcall(self.ll_encode_utf8, v_self)
else:
raise TyperError("encoding %s not implemented" % (encoding, ))
diff --git a/pypy/rpython/test/test_runicode.py b/pypy/rpython/test/test_runicode.py
--- a/pypy/rpython/test/test_runicode.py
+++ b/pypy/rpython/test/test_runicode.py
@@ -100,7 +100,7 @@
def test_unicode_encode(self):
def f(x):
y = u'xxx'
- return (y + unichr(x)).encode('ascii') + y.encode('latin-1')
+ return (y + unichr(x)).encode('ascii') + y.encode('latin-1') + y.encode('utf-8')
assert self.ll_to_string(self.interpret(f, [38])) == f(38)
More information about the pypy-commit
mailing list