[pypy-svn] r76636 - in pypy/trunk/pypy/module/cpyext: . test
dan at codespeak.net
dan at codespeak.net
Mon Aug 16 18:06:45 CEST 2010
Author: dan
Date: Mon Aug 16 18:06:44 2010
New Revision: 76636
Modified:
pypy/trunk/pypy/module/cpyext/stubs.py
pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py
pypy/trunk/pypy/module/cpyext/unicodeobject.py
Log:
Finally committing unicode changes.
Modified: pypy/trunk/pypy/module/cpyext/stubs.py
==============================================================================
--- pypy/trunk/pypy/module/cpyext/stubs.py (original)
+++ pypy/trunk/pypy/module/cpyext/stubs.py Mon Aug 16 18:06:44 2010
@@ -2874,36 +2874,6 @@
"""
raise NotImplementedError
- at cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
-def PyUnicode_DecodeUTF16(space, s, size, errors, byteorder):
- """Decode length bytes from a UTF-16 encoded buffer string and return the
- corresponding Unicode object. errors (if non-NULL) defines the error
- handling. It defaults to "strict".
-
- If byteorder is non-NULL, the decoder starts decoding using the given byte
- order:
-
- *byteorder == -1: little endian
- *byteorder == 0: native order
- *byteorder == 1: big endian
-
- If *byteorder is zero, and the first two bytes of the input data are a
- byte order mark (BOM), the decoder switches to this byte order and the BOM is
- not copied into the resulting Unicode string. If *byteorder is -1 or
- 1, any byte order mark is copied to the output (where it will result in
- either a \ufeff or a \ufffe character).
-
- After completion, *byteorder is set to the current byte order at the end
- of input data.
-
- If byteorder is NULL, the codec starts in native order mode.
-
- Return NULL if an exception was raised by the codec.
-
- This function used an int type for size. This might require
- changes in your code for properly supporting 64-bit systems."""
- raise NotImplementedError
-
@cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], PyObject)
def PyUnicode_DecodeUTF16Stateful(space, s, size, errors, byteorder, consumed):
"""If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If
Modified: pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py (original)
+++ pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py Mon Aug 16 18:06:44 2010
@@ -172,4 +172,37 @@
result = api.PyUnicode_AsASCIIString(w_ustr)
assert result is None
+ def test_decode_utf16(self, space, api):
+ def test(encoded, endian, realendian=None):
+ encoded_charp = rffi.str2charp(encoded)
+ strict_charp = rffi.str2charp("strict")
+ if endian is not None:
+ pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw')
+ if endian < 0:
+ pendian[0] = -1
+ elif endian > 0:
+ pendian[0] = 1
+ else:
+ pendian[0] = 0
+ else:
+ pendian = None
+ w_ustr = api.PyUnicode_DecodeUTF16(encoded_charp, len(encoded), strict_charp, pendian)
+ assert space.eq_w(space.call_method(w_ustr, 'encode', space.wrap('ascii')),
+ space.wrap("abcd"))
+
+ rffi.free_charp(encoded_charp)
+ rffi.free_charp(strict_charp)
+ if pendian:
+ if realendian is not None:
+ assert rffi.cast(rffi.INT, realendian) == pendian[0]
+ lltype.free(pendian, flavor='raw')
+
+ test("\x61\x00\x62\x00\x63\x00\x64\x00", -1)
+
+ test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
+
+ test("\x00\x61\x00\x62\x00\x63\x00\x64", 1)
+
+ test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
+ test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)
Modified: pypy/trunk/pypy/module/cpyext/unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/module/cpyext/unicodeobject.py (original)
+++ pypy/trunk/pypy/module/cpyext/unicodeobject.py Mon Aug 16 18:06:44 2010
@@ -9,6 +9,7 @@
from pypy.module.cpyext.pyobject import PyObject, from_ref, make_typedescr
from pypy.module.sys.interp_encoding import setdefaultencoding
from pypy.objspace.std import unicodeobject, unicodetype
+from pypy.rlib import runicode
import sys
## See comment in stringobject.py. PyUnicode_FromUnicode(NULL, size) is not
@@ -307,6 +308,64 @@
w_errors = space.w_None
return space.call_method(w_str, 'decode', space.wrap("utf-8"), w_errors)
+ at cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
+def PyUnicode_DecodeUTF16(space, s, size, llerrors, pbyteorder):
+ """Decode length bytes from a UTF-16 encoded buffer string and return the
+ corresponding Unicode object. errors (if non-NULL) defines the error
+ handling. It defaults to "strict".
+
+ If byteorder is non-NULL, the decoder starts decoding using the given byte
+ order:
+
+ *byteorder == -1: little endian
+ *byteorder == 0: native order
+ *byteorder == 1: big endian
+
+ If *byteorder is zero, and the first two bytes of the input data are a
+ byte order mark (BOM), the decoder switches to this byte order and the BOM is
+ not copied into the resulting Unicode string. If *byteorder is -1 or
+ 1, any byte order mark is copied to the output (where it will result in
+ either a \ufeff or a \ufffe character).
+
+ After completion, *byteorder is set to the current byte order at the end
+ of input data.
+
+ If byteorder is NULL, the codec starts in native order mode.
+
+ Return NULL if an exception was raised by the codec.
+
+ This function used an int type for size. This might require
+ changes in your code for properly supporting 64-bit systems."""
+
+ string = rffi.charpsize2str(s, size)
+
+ #FIXME: I don't like these prefixes
+ if pbyteorder is not None: # correct NULL check?
+ llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) # compatible with int?
+ if llbyteorder < 0:
+ byteorder = "little"
+ elif llbyteorder > 0:
+ byteorder = "big"
+ else:
+ byteorder = "native"
+ else:
+ byteorder = "native"
+
+ if llerrors:
+ errors = rffi.charp2str(llerrors)
+ else:
+ errors = None
+
+ result, length, byteorder = runicode.str_decode_utf_16_helper(string, size,
+ errors,
+ True, # final ? false for multiple passes?
+ None, # errorhandler
+ byteorder)
+ if pbyteorder is not None:
+ pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
+
+ return space.wrap(result)
+
@cpython_api([PyObject], PyObject)
def PyUnicode_AsASCIIString(space, w_unicode):
"""Encode a Unicode object using ASCII and return the result as Python string
More information about the Pypy-commit
mailing list