[pypy-commit] pypy default: Cherry-pick a few changes from the cpyext-ext branch:
amauryfa
pypy.commits at gmail.com
Mon Mar 21 13:09:02 EDT 2016
Author: Amaury Forgeot d'Arc <amauryfa at gmail.com>
Branch:
Changeset: r83223:edf35def96ce
Date: 2016-03-21 17:55 +0100
http://bitbucket.org/pypy/pypy/changeset/edf35def96ce/
Log: Cherry-pick a few changes from the cpyext-ext branch: 3df26326119c
43629fab94e1 931af853eaab
- expose "defenc" and "hash" fields of PyUnicodeObject
- Allow PyString_AsString to process unicode objects. The "defenc"
field is returned.
diff --git a/pypy/module/cpyext/bytesobject.py b/pypy/module/cpyext/bytesobject.py
--- a/pypy/module/cpyext/bytesobject.py
+++ b/pypy/module/cpyext/bytesobject.py
@@ -1,4 +1,4 @@
-from pypy.interpreter.error import OperationError
+from pypy.interpreter.error import OperationError, oefmt
from rpython.rtyper.lltypesystem import rffi, lltype
from pypy.module.cpyext.api import (
cpython_api, cpython_struct, bootstrap_function, build_type_checkers,
@@ -134,8 +134,14 @@
if from_ref(space, rffi.cast(PyObject, ref.c_ob_type)) is space.w_str:
pass # typecheck returned "ok" without forcing 'ref' at all
elif not PyString_Check(space, ref): # otherwise, use the alternate way
- raise OperationError(space.w_TypeError, space.wrap(
- "PyString_AsString only support strings"))
+ from pypy.module.cpyext.unicodeobject import (
+ PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
+ if PyUnicode_Check(space, ref):
+ ref = _PyUnicode_AsDefaultEncodedString(space, ref, None)
+ else:
+ raise oefmt(space.w_TypeError,
+ "expected string or Unicode object, %T found",
+ from_ref(space, ref))
ref_str = rffi.cast(PyStringObject, ref)
if not ref_str.c_buffer:
# copy string buffer
@@ -147,8 +153,14 @@
@cpython_api([PyObject, rffi.CCHARPP, rffi.CArrayPtr(Py_ssize_t)], rffi.INT_real, error=-1)
def PyString_AsStringAndSize(space, ref, buffer, length):
if not PyString_Check(space, ref):
- raise OperationError(space.w_TypeError, space.wrap(
- "PyString_AsStringAndSize only support strings"))
+ from pypy.module.cpyext.unicodeobject import (
+ PyUnicode_Check, _PyUnicode_AsDefaultEncodedString)
+ if PyUnicode_Check(space, ref):
+ ref = _PyUnicode_AsDefaultEncodedString(space, ref, None)
+ else:
+ raise oefmt(space.w_TypeError,
+ "expected string or Unicode object, %T found",
+ from_ref(space, ref))
ref_str = rffi.cast(PyStringObject, ref)
if not ref_str.c_buffer:
# copy string buffer
diff --git a/pypy/module/cpyext/include/unicodeobject.h b/pypy/module/cpyext/include/unicodeobject.h
--- a/pypy/module/cpyext/include/unicodeobject.h
+++ b/pypy/module/cpyext/include/unicodeobject.h
@@ -20,8 +20,12 @@
typedef struct {
PyObject_HEAD
- Py_UNICODE *buffer;
+ Py_UNICODE *str;
Py_ssize_t size;
+ long hash; /* Hash value; -1 if not set */
+ PyObject *defenc; /* (Default) Encoded version as Python
+ string, or NULL; this is used for
+ implementing the buffer protocol */
} PyUnicodeObject;
diff --git a/pypy/module/cpyext/test/test_bytesobject.py b/pypy/module/cpyext/test/test_bytesobject.py
--- a/pypy/module/cpyext/test/test_bytesobject.py
+++ b/pypy/module/cpyext/test/test_bytesobject.py
@@ -139,6 +139,44 @@
])
module.getstring()
+ def test_py_string_as_string_Unicode(self):
+ module = self.import_extension('foo', [
+ ("getstring_unicode", "METH_NOARGS",
+ """
+ Py_UNICODE chars[] = {'t', 'e', 's', 't'};
+ PyObject* u1 = PyUnicode_FromUnicode(chars, 4);
+ char *buf;
+ buf = PyString_AsString(u1);
+ if (buf == NULL)
+ return NULL;
+ if (buf[3] != 't') {
+ PyErr_SetString(PyExc_AssertionError, "Bad conversion");
+ return NULL;
+ }
+ Py_DECREF(u1);
+ Py_INCREF(Py_None);
+ return Py_None;
+ """),
+ ("getstringandsize_unicode", "METH_NOARGS",
+ """
+ Py_UNICODE chars[] = {'t', 'e', 's', 't'};
+ PyObject* u1 = PyUnicode_FromUnicode(chars, 4);
+ char *buf;
+ Py_ssize_t len;
+ if (PyString_AsStringAndSize(u1, &buf, &len) < 0)
+ return NULL;
+ if (len != 4) {
+ PyErr_SetString(PyExc_AssertionError, "Bad Length");
+ return NULL;
+ }
+ Py_DECREF(u1);
+ Py_INCREF(Py_None);
+ return Py_None;
+ """),
+ ])
+ module.getstring_unicode()
+ module.getstringandsize_unicode()
+
def test_format_v(self):
module = self.import_extension('foo', [
("test_string_format_v", "METH_VARARGS",
diff --git a/pypy/module/cpyext/test/test_unicodeobject.py b/pypy/module/cpyext/test/test_unicodeobject.py
--- a/pypy/module/cpyext/test/test_unicodeobject.py
+++ b/pypy/module/cpyext/test/test_unicodeobject.py
@@ -24,7 +24,7 @@
if(PyUnicode_GetSize(s) == 11) {
result = 1;
}
- if(s->ob_type->tp_basicsize != sizeof(void*)*5)
+ if(s->ob_type->tp_basicsize != sizeof(void*)*7)
result = 0;
Py_DECREF(s);
return PyBool_FromLong(result);
@@ -66,6 +66,7 @@
c = PyUnicode_AsUnicode(s);
c[0] = 'a';
c[1] = 0xe9;
+ c[2] = 0x00;
c[3] = 'c';
return s;
"""),
@@ -74,7 +75,35 @@
assert len(s) == 4
assert s == u'a�\x00c'
+ def test_hash(self):
+ module = self.import_extension('foo', [
+ ("test_hash", "METH_VARARGS",
+ '''
+ PyObject* obj = (PyTuple_GetItem(args, 0));
+ long hash = ((PyUnicodeObject*)obj)->hash;
+ return PyLong_FromLong(hash);
+ '''
+ ),
+ ])
+ res = module.test_hash(u"xyz")
+ assert res == hash(u'xyz')
+ def test_default_encoded_string(self):
+ module = self.import_extension('foo', [
+ ("test_default_encoded_string", "METH_O",
+ '''
+ PyObject* result = _PyUnicode_AsDefaultEncodedString(args, "replace");
+ Py_INCREF(result);
+ return result;
+ '''
+ ),
+ ])
+ res = module.test_default_encoded_string(u"xyz")
+ assert isinstance(res, str)
+ assert res == 'xyz'
+ res = module.test_default_encoded_string(u"caf\xe9")
+ assert isinstance(res, str)
+ assert res == 'caf?'
class TestUnicode(BaseApiTest):
def test_unicodeobject(self, space, api):
@@ -155,22 +184,22 @@
def test_unicode_resize(self, space, api):
py_uni = new_empty_unicode(space, 10)
ar = lltype.malloc(PyObjectP.TO, 1, flavor='raw')
- py_uni.c_buffer[0] = u'a'
- py_uni.c_buffer[1] = u'b'
- py_uni.c_buffer[2] = u'c'
+ py_uni.c_str[0] = u'a'
+ py_uni.c_str[1] = u'b'
+ py_uni.c_str[2] = u'c'
ar[0] = rffi.cast(PyObject, py_uni)
api.PyUnicode_Resize(ar, 3)
py_uni = rffi.cast(PyUnicodeObject, ar[0])
assert py_uni.c_size == 3
- assert py_uni.c_buffer[1] == u'b'
- assert py_uni.c_buffer[3] == u'\x00'
+ assert py_uni.c_str[1] == u'b'
+ assert py_uni.c_str[3] == u'\x00'
# the same for growing
ar[0] = rffi.cast(PyObject, py_uni)
api.PyUnicode_Resize(ar, 10)
py_uni = rffi.cast(PyUnicodeObject, ar[0])
assert py_uni.c_size == 10
- assert py_uni.c_buffer[1] == 'b'
- assert py_uni.c_buffer[10] == '\x00'
+ assert py_uni.c_str[1] == 'b'
+ assert py_uni.c_str[10] == '\x00'
Py_DecRef(space, ar[0])
lltype.free(ar, flavor='raw')
diff --git a/pypy/module/cpyext/unicodeobject.py b/pypy/module/cpyext/unicodeobject.py
--- a/pypy/module/cpyext/unicodeobject.py
+++ b/pypy/module/cpyext/unicodeobject.py
@@ -22,7 +22,8 @@
PyUnicodeObjectStruct = lltype.ForwardReference()
PyUnicodeObject = lltype.Ptr(PyUnicodeObjectStruct)
PyUnicodeObjectFields = (PyObjectFields +
- (("buffer", rffi.CWCHARP), ("size", Py_ssize_t)))
+ (("str", rffi.CWCHARP), ("size", Py_ssize_t),
+ ("hash", rffi.LONG), ("defenc", PyObject)))
cpython_struct("PyUnicodeObject", PyUnicodeObjectFields, PyUnicodeObjectStruct)
@bootstrap_function
@@ -54,16 +55,20 @@
buflen = length + 1
py_uni.c_size = length
- py_uni.c_buffer = lltype.malloc(rffi.CWCHARP.TO, buflen,
- flavor='raw', zero=True,
- add_memory_pressure=True)
+ py_uni.c_str = lltype.malloc(rffi.CWCHARP.TO, buflen,
+ flavor='raw', zero=True,
+ add_memory_pressure=True)
+ py_uni.c_hash = -1
+ py_uni.c_defenc = lltype.nullptr(PyObject.TO)
return py_uni
def unicode_attach(space, py_obj, w_obj):
"Fills a newly allocated PyUnicodeObject with a unicode string"
py_unicode = rffi.cast(PyUnicodeObject, py_obj)
py_unicode.c_size = len(space.unicode_w(w_obj))
- py_unicode.c_buffer = lltype.nullptr(rffi.CWCHARP.TO)
+ py_unicode.c_str = lltype.nullptr(rffi.CWCHARP.TO)
+ py_unicode.c_hash = space.hash_w(w_obj)
+ py_unicode.c_defenc = lltype.nullptr(PyObject.TO)
def unicode_realize(space, py_obj):
"""
@@ -71,17 +76,20 @@
be modified after this call.
"""
py_uni = rffi.cast(PyUnicodeObject, py_obj)
- s = rffi.wcharpsize2unicode(py_uni.c_buffer, py_uni.c_size)
+ s = rffi.wcharpsize2unicode(py_uni.c_str, py_uni.c_size)
w_obj = space.wrap(s)
+ py_uni.c_hash = space.hash_w(w_obj)
track_reference(space, py_obj, w_obj)
return w_obj
@cpython_api([PyObject], lltype.Void, header=None)
def unicode_dealloc(space, py_obj):
py_unicode = rffi.cast(PyUnicodeObject, py_obj)
- if py_unicode.c_buffer:
- lltype.free(py_unicode.c_buffer, flavor="raw")
+ if py_unicode.c_str:
+ lltype.free(py_unicode.c_str, flavor="raw")
from pypy.module.cpyext.object import PyObject_dealloc
+ if py_unicode.c_defenc:
+ PyObject_dealloc(space, py_unicode.c_defenc)
PyObject_dealloc(space, py_obj)
@cpython_api([Py_UNICODE], rffi.INT_real, error=CANNOT_FAIL)
@@ -205,12 +213,12 @@
"""Return a pointer to the internal Py_UNICODE buffer of the object. ref
has to be a PyUnicodeObject (not checked)."""
ref_unicode = rffi.cast(PyUnicodeObject, ref)
- if not ref_unicode.c_buffer:
+ if not ref_unicode.c_str:
# Copy unicode buffer
w_unicode = from_ref(space, ref)
u = space.unicode_w(w_unicode)
- ref_unicode.c_buffer = rffi.unicode2wcharp(u)
- return ref_unicode.c_buffer
+ ref_unicode.c_str = rffi.unicode2wcharp(u)
+ return ref_unicode.c_str
@cpython_api([PyObject], rffi.CWCHARP)
def PyUnicode_AsUnicode(space, ref):
@@ -241,7 +249,7 @@
string may or may not be 0-terminated. It is the responsibility of the caller
to make sure that the wchar_t string is 0-terminated in case this is
required by the application."""
- c_buffer = PyUnicode_AS_UNICODE(space, rffi.cast(PyObject, ref))
+ c_str = PyUnicode_AS_UNICODE(space, rffi.cast(PyObject, ref))
c_size = ref.c_size
# If possible, try to copy the 0-termination as well
@@ -251,7 +259,7 @@
i = 0
while i < size:
- buf[i] = c_buffer[i]
+ buf[i] = c_str[i]
i += 1
if size > c_size:
@@ -343,8 +351,15 @@
return PyUnicode_FromUnicode(space, wchar_p, length)
@cpython_api([PyObject, CONST_STRING], PyObject)
-def _PyUnicode_AsDefaultEncodedString(space, w_unicode, errors):
- return PyUnicode_AsEncodedString(space, w_unicode, lltype.nullptr(rffi.CCHARP.TO), errors)
+def _PyUnicode_AsDefaultEncodedString(space, ref, errors):
+ # Returns a borrowed reference.
+ py_uni = rffi.cast(PyUnicodeObject, ref)
+ if not py_uni.c_defenc:
+ py_uni.c_defenc = make_ref(
+ space, PyUnicode_AsEncodedString(
+ space, ref,
+ lltype.nullptr(rffi.CCHARP.TO), errors))
+ return py_uni.c_defenc
@cpython_api([CONST_STRING, Py_ssize_t, CONST_STRING, CONST_STRING], PyObject)
def PyUnicode_Decode(space, s, size, encoding, errors):
@@ -444,7 +459,7 @@
def PyUnicode_Resize(space, ref, newsize):
# XXX always create a new string so far
py_uni = rffi.cast(PyUnicodeObject, ref[0])
- if not py_uni.c_buffer:
+ if not py_uni.c_str:
raise OperationError(space.w_SystemError, space.wrap(
"PyUnicode_Resize called on already created string"))
try:
@@ -458,7 +473,7 @@
if oldsize < newsize:
to_cp = oldsize
for i in range(to_cp):
- py_newuni.c_buffer[i] = py_uni.c_buffer[i]
+ py_newuni.c_str[i] = py_uni.c_str[i]
Py_DecRef(space, ref[0])
ref[0] = rffi.cast(PyObject, py_newuni)
return 0
More information about the pypy-commit
mailing list