[pypy-svn] pypy default: Add support for PyUnicode_FromUnicode(NULL, size), which allocates a (temporarily) mutable unicode string.
amauryfa
commits-noreply at bitbucket.org
Sat Mar 26 01:12:49 CET 2011
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r42951:88b090e851cc
Date: 2011-03-26 01:12 +0100
http://bitbucket.org/pypy/pypy/changeset/88b090e851cc/
Log: Add support for PyUnicode_FromUnicode(NULL, size), which allocates a
(temporarily) mutable unicode string. Also implement
PyUnicode_Resize.
See comments in stringobject.py for a complete explanation
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -1,9 +1,81 @@
# encoding: iso-8859-15
from pypy.module.cpyext.test.test_api import BaseApiTest
-from pypy.module.cpyext.unicodeobject import Py_UNICODE
+from pypy.module.cpyext.test.test_cpyext import AppTestCpythonExtensionBase
+from pypy.module.cpyext.unicodeobject import (
+ Py_UNICODE, PyUnicodeObject, new_empty_unicode)
+from pypy.module.cpyext.api import PyObjectP, PyObject
+from pypy.module.cpyext.pyobject import Py_DecRef
from pypy.rpython.lltypesystem import rffi, lltype
import sys, py
+class AppTestUnicodeObject(AppTestCpythonExtensionBase):
+ def test_unicodeobject(self):
+ module = self.import_extension('foo', [
+ ("get_hello1", "METH_NOARGS",
+ """
+ return PyUnicode_FromStringAndSize(
+ "Hello world<should not be included>", 11);
+ """),
+ ("test_GetSize", "METH_NOARGS",
+ """
+ PyObject* s = PyUnicode_FromString("Hello world");
+ int result = 0;
+
+ if(PyUnicode_GetSize(s) == 11) {
+ result = 1;
+ }
+ if(s->ob_type->tp_basicsize != sizeof(void*)*4)
+ result = 0;
+ Py_DECREF(s);
+ return PyBool_FromLong(result);
+ """),
+ ("test_GetSize_exception", "METH_NOARGS",
+ """
+ PyObject* f = PyFloat_FromDouble(1.0);
+ Py_ssize_t size = PyUnicode_GetSize(f);
+
+ Py_DECREF(f);
+ return NULL;
+ """),
+ ("test_is_unicode", "METH_VARARGS",
+ """
+ return PyBool_FromLong(PyUnicode_Check(PyTuple_GetItem(args, 0)));
+ """)])
+ assert module.get_hello1() == u'Hello world'
+ assert module.test_GetSize()
+ raises(TypeError, module.test_GetSize_exception)
+
+ assert module.test_is_unicode(u"")
+ assert not module.test_is_unicode(())
+
+ def test_unicode_buffer_init(self):
+ module = self.import_extension('foo', [
+ ("getunicode", "METH_NOARGS",
+ """
+ PyObject *s, *t;
+ Py_UNICODE* c;
+ Py_ssize_t len;
+
+ s = PyUnicode_FromUnicode(NULL, 4);
+ if (s == NULL)
+ return NULL;
+ t = PyUnicode_FromUnicode(NULL, 3);
+ if (t == NULL)
+ return NULL;
+ Py_DECREF(t);
+ c = PyUnicode_AsUnicode(s);
+ c[0] = 'a';
+ c[1] = 0xe9;
+ c[3] = 'c';
+ return s;
+ """),
+ ])
+ s = module.getunicode()
+ assert len(s) == 4
+ assert s == u'a�\x00c'
+
+
+
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space, api):
assert api.PyUnicode_GET_SIZE(space.wrap(u'sp�m')) == 4
@@ -77,6 +149,28 @@
assert space.unwrap(w_res) == u'sp�'
rffi.free_charp(s)
+ def test_unicode_resize(self, space, api):
+ py_uni = new_empty_unicode(space, 10)
+ ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
+ py_uni.c_buffer[0] = u'a'
+ py_uni.c_buffer[1] = u'b'
+ py_uni.c_buffer[2] = u'c'
+ ar[0] = rffi.cast(PyObject, py_uni)
+ api.PyUnicode_Resize(ar, 3)
+ py_uni = rffi.cast(PyUnicodeObject, ar[0])
+ assert py_uni.c_size == 3
+ assert py_uni.c_buffer[1] == u'b'
+ assert py_uni.c_buffer[3] == u'\x00'
+ # the same for growing
+ ar[0] = rffi.cast(PyObject, py_uni)
+ api.PyUnicode_Resize(ar, 10)
+ py_uni = rffi.cast(PyUnicodeObject, ar[0])
+ assert py_uni.c_size == 10
+ assert py_uni.c_buffer[1] == 'b'
+ assert py_uni.c_buffer[10] == '\x00'
+ Py_DecRef(space, ar[0])
+ lltype.free(ar, flavor='raw')
+
def test_AsUTF8String(self, space, api):
w_u = space.wrap(u'sp�m')
w_res = api.PyUnicode_AsUTF8String(w_u)
@@ -235,13 +329,13 @@
x_chunk = api.PyUnicode_AS_UNICODE(w_x)
api.Py_UNICODE_COPY(target_chunk, x_chunk, 4)
- w_y = api.PyUnicode_FromUnicode(target_chunk, 4)
+ w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, 4))
assert space.eq_w(w_y, space.wrap(u"abcd"))
size = api.PyUnicode_GET_SIZE(w_x)
api.Py_UNICODE_COPY(target_chunk, x_chunk, size)
- w_y = api.PyUnicode_FromUnicode(target_chunk, size)
+ w_y = space.wrap(rffi.wcharpsize2unicode(target_chunk, size))
assert space.eq_w(w_y, w_x)
diff --git a/pypy/module/cpyext/stringobject.py b/pypy/module/cpyext/stringobject.py
--- a/pypy/module/cpyext/stringobject.py
+++ b/pypy/module/cpyext/stringobject.py
@@ -15,7 +15,7 @@
## The problem
## -----------
##
-## PyString_AsString() must returns a (non-movable) pointer to the underlying
+## PyString_AsString() must return a (non-movable) pointer to the underlying
## buffer, whereas pypy strings are movable. C code may temporarily store
## this address and use it, as long as it owns a reference to the PyObject.
## There is no "release" function to specify that the pointer is not needed
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -7,15 +7,16 @@
bootstrap_function, PyObjectFields, cpython_struct, CONST_STRING,
CONST_WSTRING)
from pypy.module.cpyext.pyerrors import PyErr_BadArgument
-from pypy.module.cpyext.pyobject import PyObject, from_ref, make_typedescr
+from pypy.module.cpyext.pyobject import (
+ PyObject, PyObjectP, Py_DecRef, make_ref, from_ref, track_reference,
+ make_typedescr, get_typedescr)
from pypy.module.cpyext.stringobject import PyString_Check
from pypy.module.sys.interp_encoding import setdefaultencoding
from pypy.objspace.std import unicodeobject, unicodetype
from pypy.rlib import runicode
import sys
-## See comment in stringobject.py. PyUnicode_FromUnicode(NULL, size) is not
-## yet supported.
+## See comment in stringobject.py.
PyUnicodeObjectStruct = lltype.ForwardReference()
PyUnicodeObject = lltype.Ptr(PyUnicodeObjectStruct)
@@ -28,7 +29,8 @@
make_typedescr(space.w_unicode.instancetypedef,
basestruct=PyUnicodeObject.TO,
attach=unicode_attach,
- dealloc=unicode_dealloc)
+ dealloc=unicode_dealloc,
+ realize=unicode_realize)
# Buffer for the default encoding (used by PyUnicde_GetDefaultEncoding)
DEFAULT_ENCODING_SIZE = 100
@@ -39,12 +41,39 @@
Py_UNICODE = lltype.UniChar
+def new_empty_unicode(space, length):
+ """
+ Allocatse a PyUnicodeObject and its buffer, but without a corresponding
+ interpreter object. The buffer may be mutated, until unicode_realize() is
+ called.
+ """
+ typedescr = get_typedescr(space.w_unicode.instancetypedef)
+ py_obj = typedescr.allocate(space, space.w_unicode)
+ py_uni = rffi.cast(PyUnicodeObject, py_obj)
+
+ buflen = length + 1
+ py_uni.c_size = length
+ py_uni.c_buffer = lltype.malloc(rffi.CWCHARP.TO, buflen,
+ flavor='raw', zero=True)
+ return py_uni
+
def unicode_attach(space, py_obj, w_obj):
"Fills a newly allocated PyUnicodeObject with a unicode string"
py_unicode = rffi.cast(PyUnicodeObject, py_obj)
py_unicode.c_size = len(space.unicode_w(w_obj))
py_unicode.c_buffer = lltype.nullptr(rffi.CWCHARP.TO)
+def unicode_realize(space, py_obj):
+ """
+ Creates the unicode in the interpreter. The PyUnicodeObject buffer must not
+ be modified after this call.
+ """
+ py_uni = rffi.cast(PyUnicodeObject, py_obj)
+ s = rffi.wcharpsize2unicode(py_uni.c_buffer, py_uni.c_size)
+ w_obj = space.wrap(s)
+ track_reference(space, py_obj, w_obj)
+ return w_obj
+
@cpython_api([PyObject], lltype.Void, external=False)
def unicode_dealloc(space, py_obj):
py_unicode = rffi.cast(PyUnicodeObject, py_obj)
@@ -128,7 +157,9 @@
def PyUnicode_AsUnicode(space, ref):
"""Return a read-only pointer to the Unicode object's internal Py_UNICODE
buffer, NULL if unicode is not a Unicode object."""
- if not PyUnicode_Check(space, ref):
+ # Don't use PyUnicode_Check, it will realize the object :-(
+ w_type = from_ref(space, rffi.cast(PyObject, ref.c_ob_type))
+ if not space.is_true(space.issubtype(w_type, space.w_unicode)):
raise OperationError(space.w_TypeError,
space.wrap("expected unicode object"))
return PyUnicode_AS_UNICODE(space, ref)
@@ -237,10 +268,11 @@
object. If the buffer is not NULL, the return value might be a shared object.
Therefore, modification of the resulting Unicode object is only allowed when u
is NULL."""
- if not wchar_p:
- raise NotImplementedError
- s = rffi.wcharpsize2unicode(wchar_p, length)
- return space.wrap(s)
+ if wchar_p:
+ s = rffi.wcharpsize2unicode(wchar_p, length)
+ return make_ref(space, space.wrap(s))
+ else:
+ return rffi.cast(PyObject, new_empty_unicode(space, length))
@cpython_api([CONST_WSTRING, Py_ssize_t], PyObject)
def PyUnicode_FromWideChar(space, wchar_p, length):
@@ -330,6 +362,29 @@
w_str = space.wrap(rffi.charpsize2str(s, size))
return space.call_method(w_str, 'decode', space.wrap("utf-8"))
+ at cpython_api([PyObjectP, Py_ssize_t], rffi.INT_real, error=-1)
+def PyUnicode_Resize(space, ref, newsize):
+ # XXX always create a new string so far
+ py_uni = rffi.cast(PyUnicodeObject, ref[0])
+ if not py_uni.c_buffer:
+ raise OperationError(space.w_SystemError, space.wrap(
+ "PyUnicode_Resize called on already created string"))
+ try:
+ py_newuni = new_empty_unicode(space, newsize)
+ except MemoryError:
+ Py_DecRef(space, ref[0])
+ ref[0] = lltype.nullptr(PyObject.TO)
+ raise
+ to_cp = newsize
+ oldsize = py_uni.c_size
+ if oldsize < newsize:
+ to_cp = oldsize
+ for i in range(to_cp):
+ py_newuni.c_buffer[i] = py_uni.c_buffer[i]
+ Py_DecRef(space, ref[0])
+ ref[0] = rffi.cast(PyObject, py_newuni)
+ return 0
+
@cpython_api([PyObject], PyObject)
def PyUnicode_AsUTF8String(space, w_unicode):
"""Encode a Unicode object using UTF-8 and return the result as Python string
More information about the Pypy-commit
mailing list