[pypy-commit] pypy utf8-unicode2: Fix MBCS codecs on Windows
waedt
noreply at buildbot.pypy.org
Sat Sep 6 22:45:09 CEST 2014
Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r73348:19502a3ecd04
Date: 2014-08-28 23:48 -0500
http://bitbucket.org/pypy/pypy/changeset/19502a3ecd04/
Log: Fix MBCS codecs on Windows
diff --git a/pypy/interpreter/test/test_utf8_codecs.py b/pypy/interpreter/test/test_utf8_codecs.py
--- a/pypy/interpreter/test/test_utf8_codecs.py
+++ b/pypy/interpreter/test/test_utf8_codecs.py
@@ -727,7 +727,7 @@
def test_mbcs_encode_force_replace(self):
if sys.platform != 'win32':
py.test.skip("mbcs encoding is win32-specific")
- u = u'@test_2224_tmp-?L??\udc80'
+ u = Utf8Str.from_unicode(u'@test_2224_tmp-?L??\udc80')
encoder = self.getencoder('mbcs')
assert encoder(u, len(u), 'strict') == '@test_2224_tmp-?L???'
py.test.raises(UnicodeEncodeError, encoder, u, len(u), 'strict',
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -9,12 +9,18 @@
wchar_rint = rffi.r_int
+wchar_ruint = rffi.r_uint
WCHAR_INTP = rffi.INTP
+WCHAR_UINTP = rffi.UINTP
WCHAR_INT = rffi.INT
+WCHAR_UINT = rffi.UINT
if rffi.sizeof(rffi.WCHAR_T) == 2:
wchar_rint = rffi.r_short
+ wchar_ruint = rffi.r_ushort
WCHAR_INTP = rffi.SHORTP
+ WCHAR_UINTP = rffi.USHORTP
WCHAR_INT = rffi.SHORT
+ WCHAR_UINT = rffi.USHORT
def utf8chr(value):
@@ -541,11 +547,11 @@
if c > 0xFFFF:
length += 1
- array = lltype.malloc(WCHAR_INTP.TO, length, flavor='raw',
+ array = lltype.malloc(WCHAR_UINTP.TO, length, flavor='raw',
track_allocation=track_allocation)
self.copy_to_wcharp(array, 0, length)
- array[length - 1] = wchar_rint(0)
+ array[length - 1] = wchar_ruint(0)
array = rffi.cast(rffi.CWCHARP, array)
return array
@@ -560,12 +566,12 @@
if rffi.sizeof(rffi.WCHAR_T) == 2:
c1, c2 = create_surrogate_pair(c)
- dst[i + dststart] = wchar_rint(c1)
+ dst[i + dststart] = wchar_ruint(c1)
if c2:
i += 1
- dst[i + dststart] = wchar_rint(c2)
+ dst[i + dststart] = wchar_ruint(c2)
else:
- dst[i + dststart] = wchar_rint(c)
+ dst[i + dststart] = wchar_ruint(c)
i += 1
@@ -574,7 +580,7 @@
@staticmethod
def from_wcharp(wcharp):
- array = rffi.cast(WCHAR_INTP, wcharp)
+ array = rffi.cast(WCHAR_UINTP, wcharp)
builder = Utf8Builder()
i = 0;
while True:
@@ -602,7 +608,7 @@
@staticmethod
def from_wcharpn(wcharp, size):
- array = rffi.cast(WCHAR_INTP, wcharp)
+ array = rffi.cast(WCHAR_UINTP, wcharp)
builder = Utf8Builder()
i = 0;
while i < size:
@@ -630,7 +636,7 @@
@staticmethod
def from_wcharpsize(wcharp, size):
- array = rffi.cast(WCHAR_INTP, wcharp)
+ array = rffi.cast(WCHAR_UINTP, wcharp)
builder = Utf8Builder()
i = 0;
while i < size:
diff --git a/pypy/interpreter/utf8_codecs.py b/pypy/interpreter/utf8_codecs.py
--- a/pypy/interpreter/utf8_codecs.py
+++ b/pypy/interpreter/utf8_codecs.py
@@ -1483,7 +1483,9 @@
if MultiByteToWideChar(CP_ACP, flags,
dataptr, size, buf.raw, usize) == 0:
_decode_mbcs_error(s, errorhandler)
- return buf.str(usize), size
+ # TODO Is this cast necessary for rpython static-typing?
+ #return Utf8Str.from_wcharp(rffi.cast(rffi.CWCHARP, buf.raw)), size
+ return Utf8Str.from_wcharpsize(buf.raw, size), size
def unicode_encode_mbcs(s, size, errors, errorhandler=None,
force_replace=True):
@@ -1507,7 +1509,7 @@
used_default_p[0] = rffi.cast(rwin32.BOOL, False)
try:
- with rffi.scoped_nonmoving_unicodebuffer(s) as dataptr:
+ with s.scoped_wcharp_copy() as dataptr:
# first get the size of the result
mbcssize = WideCharToMultiByte(CP_ACP, flags,
dataptr, size, None, 0,
More information about the pypy-commit
mailing list