[pypy-svn] r76636 - in pypy/trunk/pypy/module/cpyext: . test

Mon Aug 16 18:06:45 CEST 2010

Author: dan
Date: Mon Aug 16 18:06:44 2010
New Revision: 76636

Modified:
   pypy/trunk/pypy/module/cpyext/stubs.py
   pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py
   pypy/trunk/pypy/module/cpyext/unicodeobject.py
Log:
Finally committing unicode changes.

Modified: pypy/trunk/pypy/module/cpyext/stubs.py
==============================================================================

--- pypy/trunk/pypy/module/cpyext/stubs.py	(original)
+++ pypy/trunk/pypy/module/cpyext/stubs.py	Mon Aug 16 18:06:44 2010
@@ -2874,36 +2874,6 @@
     """
     raise NotImplementedError
 
- at cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
-def PyUnicode_DecodeUTF16(space, s, size, errors, byteorder):
-    """Decode length bytes from a UTF-16 encoded buffer string and return the
-    corresponding Unicode object.  errors (if non-NULL) defines the error
-    handling. It defaults to "strict".
-    
-    If byteorder is non-NULL, the decoder starts decoding using the given byte
-    order:
-    
-    *byteorder == -1: little endian
-    *byteorder == 0:  native order
-    *byteorder == 1:  big endian
-    
-    If *byteorder is zero, and the first two bytes of the input data are a
-    byte order mark (BOM), the decoder switches to this byte order and the BOM is
-    not copied into the resulting Unicode string.  If *byteorder is -1 or
-    1, any byte order mark is copied to the output (where it will result in
-    either a \ufeff or a \ufffe character).
-    
-    After completion, *byteorder is set to the current byte order at the end
-    of input data.
-    
-    If byteorder is NULL, the codec starts in native order mode.
-    
-    Return NULL if an exception was raised by the codec.
-    
-    This function used an int type for size. This might require
-    changes in your code for properly supporting 64-bit systems."""
-    raise NotImplementedError
-
 @cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP, Py_ssize_t], PyObject)
 def PyUnicode_DecodeUTF16Stateful(space, s, size, errors, byteorder, consumed):
     """If consumed is NULL, behave like PyUnicode_DecodeUTF16(). If

Modified: pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py	(original)
+++ pypy/trunk/pypy/module/cpyext/test/test_unicodeobject.py	Mon Aug 16 18:06:44 2010
@@ -172,4 +172,37 @@
         result = api.PyUnicode_AsASCIIString(w_ustr)
         assert result is None
 
+    def test_decode_utf16(self, space, api):
+        def test(encoded, endian, realendian=None):
+            encoded_charp = rffi.str2charp(encoded)
+            strict_charp = rffi.str2charp("strict")
+            if endian is not None:
+                pendian = lltype.malloc(rffi.INTP.TO, 1, flavor='raw')
+                if endian < 0:
+                    pendian[0] = -1
+                elif endian > 0:
+                    pendian[0] = 1
+                else:
+                    pendian[0] = 0
+            else:
+                pendian = None
 
+            w_ustr = api.PyUnicode_DecodeUTF16(encoded_charp, len(encoded), strict_charp, pendian)
+            assert space.eq_w(space.call_method(w_ustr, 'encode', space.wrap('ascii')),
+                              space.wrap("abcd"))
+
+            rffi.free_charp(encoded_charp)
+            rffi.free_charp(strict_charp)
+            if pendian:
+                if realendian is not None:
+                    assert rffi.cast(rffi.INT, realendian) == pendian[0]
+                lltype.free(pendian, flavor='raw')
+
+        test("\x61\x00\x62\x00\x63\x00\x64\x00", -1)
+
+        test("\x61\x00\x62\x00\x63\x00\x64\x00", None)
+
+        test("\x00\x61\x00\x62\x00\x63\x00\x64", 1)
+
+        test("\xFE\xFF\x00\x61\x00\x62\x00\x63\x00\x64", 0, 1)
+        test("\xFF\xFE\x61\x00\x62\x00\x63\x00\x64\x00", 0, -1)

Modified: pypy/trunk/pypy/module/cpyext/unicodeobject.py
==============================================================================
--- pypy/trunk/pypy/module/cpyext/unicodeobject.py	(original)
+++ pypy/trunk/pypy/module/cpyext/unicodeobject.py	Mon Aug 16 18:06:44 2010
@@ -9,6 +9,7 @@
 from pypy.module.cpyext.pyobject import PyObject, from_ref, make_typedescr
 from pypy.module.sys.interp_encoding import setdefaultencoding
 from pypy.objspace.std import unicodeobject, unicodetype
+from pypy.rlib import runicode
 import sys
 
 ## See comment in stringobject.py.  PyUnicode_FromUnicode(NULL, size) is not
@@ -307,6 +308,64 @@
         w_errors = space.w_None
     return space.call_method(w_str, 'decode', space.wrap("utf-8"), w_errors)
 
+ at cpython_api([rffi.CCHARP, Py_ssize_t, rffi.CCHARP, rffi.INTP], PyObject)
+def PyUnicode_DecodeUTF16(space, s, size, llerrors, pbyteorder):
+    """Decode length bytes from a UTF-16 encoded buffer string and return the
+    corresponding Unicode object.  errors (if non-NULL) defines the error
+    handling. It defaults to "strict".
+    
+    If byteorder is non-NULL, the decoder starts decoding using the given byte
+    order:
+    
+    *byteorder == -1: little endian
+    *byteorder == 0:  native order
+    *byteorder == 1:  big endian
+    
+    If *byteorder is zero, and the first two bytes of the input data are a
+    byte order mark (BOM), the decoder switches to this byte order and the BOM is
+    not copied into the resulting Unicode string.  If *byteorder is -1 or
+    1, any byte order mark is copied to the output (where it will result in
+    either a \ufeff or a \ufffe character).
+    
+    After completion, *byteorder is set to the current byte order at the end
+    of input data.
+    
+    If byteorder is NULL, the codec starts in native order mode.
+    
+    Return NULL if an exception was raised by the codec.
+    
+    This function used an int type for size. This might require
+    changes in your code for properly supporting 64-bit systems."""
+
+    string = rffi.charpsize2str(s, size)
+
+    #FIXME: I don't like these prefixes
+    if pbyteorder is not None: # correct NULL check?
+        llbyteorder = rffi.cast(lltype.Signed, pbyteorder[0]) # compatible with int?
+        if llbyteorder < 0:
+            byteorder = "little"
+        elif llbyteorder > 0:
+            byteorder = "big"
+        else:
+            byteorder = "native"
+    else:
+        byteorder = "native"
+
+    if llerrors:
+        errors = rffi.charp2str(llerrors)
+    else:
+        errors = None
+
+    result, length, byteorder = runicode.str_decode_utf_16_helper(string, size,
+                                           errors,
+                                           True, # final ? false for multiple passes?
+                                           None, # errorhandler
+                                           byteorder)
+    if pbyteorder is not None:
+        pbyteorder[0] = rffi.cast(rffi.INT, byteorder)
+
+    return space.wrap(result)
+
 @cpython_api([PyObject], PyObject)
 def PyUnicode_AsASCIIString(space, w_unicode):
     """Encode a Unicode object using ASCII and return the result as Python string