[pypy-svn] r48788 - in pypy/branch/ropes-unicode/pypy: annotation rpython rpython/lltypesystem rpython/ootypesystem rpython/test
cfbolz at codespeak.net
cfbolz at codespeak.net
Mon Nov 19 14:51:54 CET 2007
Author: cfbolz
Date: Mon Nov 19 14:51:53 2007
New Revision: 48788
Modified:
pypy/branch/ropes-unicode/pypy/annotation/unaryop.py
pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py
pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py
pypy/branch/ropes-unicode/pypy/rpython/rstr.py
pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py
Log:
add encoding and decoding with latin-1
Modified: pypy/branch/ropes-unicode/pypy/annotation/unaryop.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/annotation/unaryop.py (original)
+++ pypy/branch/ropes-unicode/pypy/annotation/unaryop.py Mon Nov 19 14:51:53 2007
@@ -466,7 +466,7 @@
if not s_enc.is_constant():
raise TypeError("Non-constant encoding not supported")
enc = s_enc.const
- if enc != 'ascii':
+ if enc not in ('ascii', 'latin-1'):
raise TypeError("Encoding %s not supported for unicode" % (enc,))
return SomeString()
method_encode.can_only_throw = [UnicodeEncodeError]
@@ -482,7 +482,7 @@
if not s_enc.is_constant():
raise TypeError("Non-constant encoding not supported")
enc = s_enc.const
- if enc != 'ascii':
+ if enc not in ('ascii', 'latin-1'):
raise TypeError("Encoding %s not supported for strings" % (enc,))
return SomeUnicodeString()
method_decode.can_only_throw = [UnicodeDecodeError]
Modified: pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py (original)
+++ pypy/branch/ropes-unicode/pypy/rpython/lltypesystem/rstr.py Mon Nov 19 14:51:53 2007
@@ -108,6 +108,13 @@
self.ll = LLHelpers
self.malloc = mallocstr
+ def ll_decode_latin1(self, value):
+ lgt = len(value.chars)
+ s = mallocunicode(lgt)
+ for i in range(lgt):
+ s.chars[i] = cast_primitive(UniChar, value.chars[i])
+ return s
+
class UnicodeRepr(BaseLLStringRepr, AbstractUnicodeRepr):
lowleveltype = Ptr(UNICODE)
basetype = basestring
@@ -131,6 +138,17 @@
result.chars[i] = cast_primitive(Char, c)
return result
+ def ll_encode_latin1(self, s):
+ length = len(s.chars)
+ result = mallocstr(length)
+ for i in range(length):
+ c = s.chars[i]
+ if ord(c) > 255:
+ raise UnicodeEncodeError("character not in latin1 range")
+ result.chars[i] = cast_primitive(Char, c)
+ return result
+
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
Modified: pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py (original)
+++ pypy/branch/ropes-unicode/pypy/rpython/ootypesystem/rstr.py Mon Nov 19 14:51:53 2007
@@ -48,6 +48,16 @@
def make_string(self, value):
return ootype.make_string(value)
+ def ll_decode_latin1(self, value):
+ sb = ootype.new(ootype.UnicodeBuilder)
+ length = value.ll_strlen()
+ sb.ll_allocate(length)
+ for i in range(length):
+ c = value.ll_stritem_nonneg(i)
+ sb.ll_append_char(cast_primitive(UniChar, c))
+ return sb.ll_build()
+
+
class UnicodeRepr(BaseOOStringRepr, AbstractUnicodeRepr):
lowleveltype = ootype.Unicode
basetype = basestring
@@ -66,6 +76,17 @@
sb.ll_append_char(cast_primitive(Char, c))
return sb.ll_build()
+ def ll_encode_latin1(self, value):
+ sb = ootype.new(ootype.StringBuilder)
+ length = value.ll_strlen()
+ sb.ll_allocate(length)
+ for i in range(length):
+ c = value.ll_stritem_nonneg(i)
+ if ord(c) > 255:
+ raise UnicodeEncodeError("%d > 255, not latin-1" % ord(c))
+ sb.ll_append_char(cast_primitive(Char, c))
+ return sb.ll_build()
+
class CharRepr(AbstractCharRepr, StringRepr):
lowleveltype = Char
Modified: pypy/branch/ropes-unicode/pypy/rpython/rstr.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/rstr.py (original)
+++ pypy/branch/ropes-unicode/pypy/rpython/rstr.py Mon Nov 19 14:51:53 2007
@@ -256,9 +256,17 @@
return hop.gendirectcall(self.ll.ll_str2unicode, v_str)
def rtype_method_decode(self, hop):
- v_self = hop.inputarg(self, 0)
+ if not hop.args_s[1].is_constant():
+ raise TyperError("encoding must be a constant")
+ encoding = hop.args_s[1].const
+ v_self = hop.inputarg(self.repr, 0)
hop.exception_is_here()
- return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
+ if encoding == 'ascii':
+ return hop.gendirectcall(self.ll.ll_str2unicode, v_self)
+ elif encoding == 'latin-1':
+ return hop.gendirectcall(self.ll_decode_latin1, v_self)
+ else:
+ raise TyperError("encoding %s not implemented" % (encoding, ))
def rtype_float(self, hop):
hop.has_implicit_exception(ValueError) # record that we know about it
@@ -272,9 +280,18 @@
class __extend__(AbstractUnicodeRepr):
def rtype_method_encode(self, hop):
- v_self = hop.inputarg(self, 0)
+ if not hop.args_s[1].is_constant():
+ raise TyperError("encoding must be constant")
+ encoding = hop.args_s[1].const
+ v_self = hop.inputarg(self.repr, 0)
hop.exception_is_here()
- return hop.gendirectcall(self.ll_str, v_self)
+ if encoding == "ascii":
+ return hop.gendirectcall(self.ll_str, v_self)
+ elif encoding == "latin-1":
+ return hop.gendirectcall(self.ll_encode_latin1, v_self)
+ else:
+ raise TyperError("encoding %s not implemented" % (encoding, ))
+
class __extend__(pairtype(AbstractStringRepr, Repr)):
def rtype_mod((r_str, _), hop):
Modified: pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py
==============================================================================
--- pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py (original)
+++ pypy/branch/ropes-unicode/pypy/rpython/test/test_runicode.py Mon Nov 19 14:51:53 2007
@@ -84,26 +84,37 @@
def test_unicode_encode(self):
def f(x):
y = u'xxx'
- return (y + unichr(x)).encode('ascii')
+ return (y + unichr(x)).encode('ascii') + y.encode('latin-1')
assert self.ll_to_string(self.interpret(f, [38])) == f(38)
def test_unicode_encode_error(self):
- def f(x):
- y = u'xxx'
- try:
- x = (y + unichr(x)).encode('ascii')
- return len(x)
- except UnicodeEncodeError:
- return -1
-
- assert self.interpret(f, [38]) == f(38)
- assert self.interpret(f, [138]) == f(138)
+ def f(x, which):
+ if which:
+ y = u'xxx'
+ try:
+ x = (y + unichr(x)).encode('ascii')
+ return len(x)
+ except UnicodeEncodeError:
+ return -1
+ else:
+ y = u'xxx'
+ try:
+ x = (y + unichr(x)).encode('latin-1')
+ return len(x)
+ except UnicodeEncodeError:
+ return -1
+
+ assert self.interpret(f, [38, True]) == f(38, True)
+ assert self.interpret(f, [138, True]) == f(138, True)
+ assert self.interpret(f, [38, False]) == f(38, False)
+ assert self.interpret(f, [138, False]) == f(138, False)
+ assert self.interpret(f, [300, False]) == f(300, False)
def test_unicode_decode(self):
def f(x):
y = 'xxx'
- return (y + chr(x)).decode('ascii')
+ return (y + chr(x)).decode('ascii') + chr(x).decode("latin-1")
assert self.ll_to_string(self.interpret(f, [38])) == f(38)
More information about the Pypy-commit
mailing list