[pypy-commit] pypy utf8-unicode2: Fix MBCS codecs on Windows

Sat Sep 6 22:45:09 CEST 2014

Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r73348:19502a3ecd04
Date: 2014-08-28 23:48 -0500
http://bitbucket.org/pypy/pypy/changeset/19502a3ecd04/

Log:	Fix MBCS codecs on Windows

diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py
--- a/pypy/interpreter/test/test_utf8_codecs.py
+++ b/pypy/interpreter/test/test_utf8_codecs.py
@@ -727,7 +727,7 @@
     def test_mbcs_encode_force_replace(self):
         if sys.platform != 'win32':
             py.test.skip("mbcs encoding is win32-specific")
-        u = u'@test_2224_tmp-?L??\udc80'
+        u = Utf8Str.from_unicode(u'@test_2224_tmp-?L??\udc80')
         encoder = self.getencoder('mbcs')
         assert encoder(u, len(u), 'strict') == '@test_2224_tmp-?L???'
         py.test.raises(UnicodeEncodeError, encoder, u, len(u), 'strict',
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -9,12 +9,18 @@
 
 
 wchar_rint = rffi.r_int
+wchar_ruint = rffi.r_uint
 WCHAR_INTP = rffi.INTP
+WCHAR_UINTP = rffi.UINTP
 WCHAR_INT = rffi.INT
+WCHAR_UINT = rffi.UINT
 if rffi.sizeof(rffi.WCHAR_T) == 2:
     wchar_rint = rffi.r_short
+    wchar_ruint = rffi.r_ushort
     WCHAR_INTP = rffi.SHORTP
+    WCHAR_UINTP = rffi.USHORTP
     WCHAR_INT = rffi.SHORT
+    WCHAR_UINT = rffi.USHORT
 
 
 def utf8chr(value):
@@ -541,11 +547,11 @@
                 if c > 0xFFFF:
                     length += 1
 
-        array = lltype.malloc(WCHAR_INTP.TO, length, flavor='raw',
+        array = lltype.malloc(WCHAR_UINTP.TO, length, flavor='raw',
                               track_allocation=track_allocation)
 
         self.copy_to_wcharp(array, 0, length)
-        array[length - 1] = wchar_rint(0)
+        array[length - 1] = wchar_ruint(0)
 
         array = rffi.cast(rffi.CWCHARP, array)
         return array
@@ -560,12 +566,12 @@
 
             if rffi.sizeof(rffi.WCHAR_T) == 2:
                 c1, c2 = create_surrogate_pair(c)
-                dst[i + dststart] = wchar_rint(c1)
+                dst[i + dststart] = wchar_ruint(c1)
                 if c2:
                     i += 1
-                    dst[i + dststart] = wchar_rint(c2)
+                    dst[i + dststart] = wchar_ruint(c2)
             else:
-                dst[i + dststart] = wchar_rint(c)
+                dst[i + dststart] = wchar_ruint(c)
 
             i += 1
 
@@ -574,7 +580,7 @@
 
     @staticmethod
     def from_wcharp(wcharp):
-        array = rffi.cast(WCHAR_INTP, wcharp)
+        array = rffi.cast(WCHAR_UINTP, wcharp)
         builder = Utf8Builder()
         i = 0;
         while True:
@@ -602,7 +608,7 @@
 
     @staticmethod
     def from_wcharpn(wcharp, size):
-        array = rffi.cast(WCHAR_INTP, wcharp)
+        array = rffi.cast(WCHAR_UINTP, wcharp)
         builder = Utf8Builder()
         i = 0;
         while i < size:
@@ -630,7 +636,7 @@
 
     @staticmethod
     def from_wcharpsize(wcharp, size):
-        array = rffi.cast(WCHAR_INTP, wcharp)
+        array = rffi.cast(WCHAR_UINTP, wcharp)
         builder = Utf8Builder()
         i = 0;
         while i < size:
diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py
--- a/pypy/interpreter/utf8_codecs.py
+++ b/pypy/interpreter/utf8_codecs.py
@@ -1483,7 +1483,9 @@
                 if MultiByteToWideChar(CP_ACP, flags,
                                        dataptr, size, buf.raw, usize) == 0:
                     _decode_mbcs_error(s, errorhandler)
-                return buf.str(usize), size
+                # TODO Is this cast necessary for rpython static-typing?
+                #return Utf8Str.from_wcharp(rffi.cast(rffi.CWCHARP, buf.raw)), size
+                return Utf8Str.from_wcharpsize(buf.raw, size), size
 
     def unicode_encode_mbcs(s, size, errors, errorhandler=None,
                             force_replace=True):
@@ -1507,7 +1509,7 @@
             used_default_p[0] = rffi.cast(rwin32.BOOL, False)
 
         try:
-            with rffi.scoped_nonmoving_unicodebuffer(s) as dataptr:
+            with s.scoped_wcharp_copy() as dataptr:
                 # first get the size of the result
                 mbcssize = WideCharToMultiByte(CP_ACP, flags,
                                                dataptr, size, None, 0,