[pypy-commit] pypy utf8-unicode2: Fix _cffi_backend

Sat Jul 19 14:42:00 CEST 2014

Author: Tyler Wade <wayedt at gmail.com>
Branch: utf8-unicode2
Changeset: r72469:fbbabe9aebd1
Date: 2014-07-17 05:23 -0500
http://bitbucket.org/pypy/pypy/changeset/fbbabe9aebd1/

Log:	Fix _cffi_backend

diff --git a/pypy/interpreter/test/test_utf8.py b/pypy/interpreter/test/test_utf8.py
--- a/pypy/interpreter/test/test_utf8.py
+++ b/pypy/interpreter/test/test_utf8.py
@@ -195,18 +195,21 @@
     assert s.rsplit(' ', 2) == u.rsplit(' ', 2)
     assert s.rsplit('\n') == [s]
 
-def test_copy_to_wcharp():
+def test_copy_to_new_wcharp():
     s = build_utf8str()
     if sys.maxunicode < 0x10000 and rffi.sizeof(rffi.WCHAR_T) == 4:
         # The last character requires a surrogate pair on narrow builds and
         # so won't be converted correctly by rffi.wcharp2unicode
         s = s[:-1]
 
-    wcharp = s.copy_to_wcharp()
+    wcharp = s.copy_to_new_wcharp()
     u = rffi.wcharp2unicode(wcharp)
     rffi.free_wcharp(wcharp)
     assert s == u
 
+    with s.scoped_wcharp_copy():
+        assert s == u
+
 def test_from_wcharp():
     def check(u):
         wcharp = rffi.unicode2wcharp(u)
diff --git a/pypy/interpreter/utf8.py b/pypy/interpreter/utf8.py
--- a/pypy/interpreter/utf8.py
+++ b/pypy/interpreter/utf8.py
@@ -422,7 +422,7 @@
             byte_pos -= 1
         return byte_pos
 
-    def copy_to_wcharp(self, track_allocation=True):
+    def copy_to_new_wcharp(self, track_allocation=True):
         length = len(self) + 1
         if rffi.sizeof(rffi.WCHAR_T) == 2:
             for c in self.codepoint_iter():
@@ -431,24 +431,34 @@
 
         array = lltype.malloc(WCHAR_INTP.TO, length, flavor='raw',
                               track_allocation=track_allocation)
+
+        self.copy_to_wcharp(array, 0, length)
+        array[length - 1] = wchar_rint(0)
+
+        array = rffi.cast(rffi.CWCHARP, array)
+        return array
+
+    def copy_to_wcharp(self, dst, dststart, length):
         from pypy.interpreter.utf8_codecs import create_surrogate_pair
 
         i = 0;
         for c in self.codepoint_iter():
+            if i == length:
+                break
+
             if rffi.sizeof(rffi.WCHAR_T) == 2:
                 c1, c2 = create_surrogate_pair(c)
-                array[i] = wchar_rint(c1)
+                dst[i + dststart] = wchar_rint(c1)
                 if c2:
                     i += 1
-                    array[i] = wchar_rint(c2)
+                    dst[i + dststart] = wchar_rint(c2)
             else:
-                array[i] = wchar_rint(c)
+                dst[i + dststart] = wchar_rint(c)
 
             i += 1
 
-        array[i] = wchar_rint(0)
-        array = rffi.cast(rffi.CWCHARP, array)
-        return array
+    def scoped_wcharp_copy(self):
+        return WCharContextManager(self)
 
     @staticmethod
     def from_wcharp(wcharp):
@@ -600,6 +610,15 @@
     def build(self):
         return Utf8Str(self._builder.build(), self._is_ascii)
 
+class WCharContextManager(object):
+    def __init__(self, str):
+        self.str = str
+    def __enter__(self):
+        self.data = self.str.copy_to_new_wcharp()
+        return self.data
+    def __exit__(self, *args):
+        rffi.free_wcharp(self.data)
+
 # _______________________________________________
 
 # iter.current is the current (ie the last returned) element
diff --git a/pypy/module/__pypy__/interp_builders.py b/pypy/module/__pypy__/interp_builders.py
--- a/pypy/module/__pypy__/interp_builders.py
+++ b/pypy/module/__pypy__/interp_builders.py
@@ -2,7 +2,8 @@
 from pypy.interpreter.error import OperationError
 from pypy.interpreter.gateway import interp2app, unwrap_spec
 from pypy.interpreter.typedef import TypeDef
-from rpython.rlib.rstring import UnicodeBuilder, StringBuilder
+from pypy.interpreter.utf8 import Utf8Builder
+from rpython.rlib.rstring import StringBuilder
 from rpython.tool.sourcetools import func_with_new_name
 
 
@@ -62,4 +63,4 @@
     return W_Builder
 
 W_StringBuilder = create_builder("StringBuilder", str, StringBuilder)
-W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, UnicodeBuilder)
+W_UnicodeBuilder = create_builder("UnicodeBuilder", unicode, Utf8Builder)
diff --git a/pypy/module/_cffi_backend/ctypeprim.py b/pypy/module/_cffi_backend/ctypeprim.py
--- a/pypy/module/_cffi_backend/ctypeprim.py
+++ b/pypy/module/_cffi_backend/ctypeprim.py
@@ -9,7 +9,9 @@
 from rpython.rlib import jit
 from rpython.rtyper.lltypesystem import lltype, rffi
 
+from pypy.interpreter import utf8
 from pypy.interpreter.error import oefmt
+from pypy.interpreter.utf8 import Utf8Str, utf8ord
 from pypy.module._cffi_backend import cdataobj, misc
 from pypy.module._cffi_backend.ctypeobj import W_CType
 
@@ -46,7 +48,7 @@
             raise oefmt(space.w_TypeError,
                         "cannot cast unicode string of length %d to ctype '%s'",
                         len(s), self.name)
-        return ord(s[0])
+        return utf8ord(s)
 
     def cast(self, w_ob):
         from pypy.module._cffi_backend import ctypeptr
@@ -128,12 +130,12 @@
     _attrs_ = []
 
     def cast_to_int(self, cdata):
-        unichardata = rffi.cast(rffi.CWCHARP, cdata)
-        return self.space.wrap(ord(unichardata[0]))
+        unichardata = rffi.cast(utf8.WCHAR_INTP, cdata)
+        return self.space.wrap(intmask(unichardata[0]))
 
     def convert_to_object(self, cdata):
         unichardata = rffi.cast(rffi.CWCHARP, cdata)
-        s = rffi.wcharpsize2unicode(unichardata, 1)
+        s = Utf8Str.from_wcharpsize(unichardata, 1)
         return self.space.wrap(s)
 
     def string(self, cdataobj, maxlen):
@@ -154,7 +156,7 @@
 
     def convert_from_object(self, cdata, w_ob):
         value = self._convert_to_unichar(w_ob)
-        rffi.cast(rffi.CWCHARP, cdata)[0] = value
+        rffi.cast(utf8.WCHAR_INTP, cdata)[0] = utf8.wchar_rint(utf8ord(value))
 
 
 class W_CTypePrimitiveSigned(W_CTypePrimitive):
diff --git a/pypy/module/_cffi_backend/ctypeptr.py b/pypy/module/_cffi_backend/ctypeptr.py
--- a/pypy/module/_cffi_backend/ctypeptr.py
+++ b/pypy/module/_cffi_backend/ctypeptr.py
@@ -9,6 +9,8 @@
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.rstr import copy_string_to_raw, copy_unicode_to_raw
 
+from pypy.interpreter import utf8
+from pypy.interpreter.utf8 import Utf8Str
 from pypy.interpreter.error import OperationError, oefmt, wrap_oserror
 from pypy.module._cffi_backend import cdataobj, misc, ctypeprim, ctypevoid
 from pypy.module._cffi_backend.ctypeobj import W_CType
@@ -98,10 +100,11 @@
                 raise oefmt(space.w_IndexError,
                             "initializer unicode string is too long for '%s' "
                             "(got %d characters)", self.name, n)
-            unichardata = rffi.cast(rffi.CWCHARP, cdata)
-            copy_unicode_to_raw(llunicode(s), unichardata, 0, n)
+
+            unichardata = rffi.cast(utf8.WCHAR_INTP, cdata)
+            s.copy_to_wcharp(unichardata, 0, n)
             if n != self.length:
-                unichardata[n] = u'\x00'
+                unichardata[n] = utf8.wchar_rint(0)
         else:
             raise self._convert_error("list or tuple", w_ob)
 
@@ -131,9 +134,9 @@
             if self.is_unichar_ptr_or_array():
                 cdata = rffi.cast(rffi.CWCHARP, cdata)
                 if length < 0:
-                    u = rffi.wcharp2unicode(cdata)
+                    u = Utf8Str.from_wcharp(cdata)
                 else:
-                    u = rffi.wcharp2unicoden(cdata, length)
+                    u = Utf8Str.from_wcharpn(cdata, length)
                 keepalive_until_here(cdataobj)
                 return space.wrap(u)
         #
diff --git a/pypy/module/_multibytecodec/c_codecs.py b/pypy/module/_multibytecodec/c_codecs.py
--- a/pypy/module/_multibytecodec/c_codecs.py
+++ b/pypy/module/_multibytecodec/c_codecs.py
@@ -172,7 +172,7 @@
         assert errorcb
         replace, end = errorcb(errors, namecb, reason,
                                stringdata, start, end)
-    inbuf = replace.copy_to_wcharp()
+    inbuf = replace.copy_to_new_wcharp()
     try:
         r = pypy_cjk_dec_replace_on_error(decodebuf, inbuf, len(replace), end)
     finally:
@@ -223,7 +223,7 @@
 def encodeex(encodebuf, unicodedata, errors="strict", errorcb=None,
              namecb=None, ignore_error=0):
     inleft = len(unicodedata)
-    inbuf = unicodedata.copy_to_wcharp()
+    inbuf = unicodedata.copy_to_new_wcharp()
     try:
         if pypy_cjk_enc_init(encodebuf, inbuf, inleft) < 0:
             raise MemoryError
diff --git a/pypy/module/_rawffi/alt/interp_funcptr.py b/pypy/module/_rawffi/alt/interp_funcptr.py
--- a/pypy/module/_rawffi/alt/interp_funcptr.py
+++ b/pypy/module/_rawffi/alt/interp_funcptr.py
@@ -168,7 +168,7 @@
         self.argchain.arg(addr)
 
     def handle_unichar_p(self, w_ffitype, w_obj, unicodeval):
-        buf = unicodeval.copy_to_wcharp()
+        buf = unicodeval.copy_to_new_wcharp()
         self.w_func.to_free.append(rffi.cast(rffi.VOIDP, buf))
         addr = rffi.cast(rffi.ULONG, buf)
         self.argchain.arg(addr)
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -210,7 +210,7 @@
         # Copy unicode buffer
         w_unicode = from_ref(space, ref)
         u = space.unicode_w(w_unicode)
-        ref_unicode.c_buffer = u.copy_to_wcharp()
+        ref_unicode.c_buffer = u.copy_to_new_wcharp()
     return ref_unicode.c_buffer
 
 @cpython_api([PyObject], rffi.CWCHARP)