[Python-checkins] r67939 - python/branches/py3k/Objects/unicodeobject.c
M.-A. Lemburg
mal at egenix.com
Sat Dec 27 14:07:42 CET 2008
Alexandre,
could you please point me to the ticket or discussion of this
change ?
While I agree with the change (codecs should not use or return
mutable byte arrays), I do think that such changes must get some
more attention before being checked in.
Thanks.
On 2008-12-27 10:16, alexandre.vassalotti wrote:
> Author: alexandre.vassalotti
> Date: Sat Dec 27 10:16:49 2008
> New Revision: 67939
>
> Log:
> Optimize built-in unicode codecs by avoiding unecessary copying.
>
> The approach used is similiar to what is currently used in the version
> of unicodeobject.c in Python 2.x. The only difference is we use
> _PyBytes_Resize instead of _PyString_Resize.
>
>
> Modified:
> python/branches/py3k/Objects/unicodeobject.c
>
> Modified: python/branches/py3k/Objects/unicodeobject.c
> ==============================================================================
> --- python/branches/py3k/Objects/unicodeobject.c (original)
> +++ python/branches/py3k/Objects/unicodeobject.c Sat Dec 27 10:16:49 2008
> @@ -1873,7 +1873,7 @@
> int encodeWhiteSpace,
> const char *errors)
> {
> - PyObject *v, *result;
> + PyObject *v;
> /* It might be possible to tighten this worst case */
> Py_ssize_t cbAllocated = 5 * size;
> int inShift = 0;
> @@ -1889,11 +1889,11 @@
> if (cbAllocated / 5 != size)
> return PyErr_NoMemory();
>
> - v = PyByteArray_FromStringAndSize(NULL, cbAllocated);
> + v = PyBytes_FromStringAndSize(NULL, cbAllocated);
> if (v == NULL)
> return NULL;
>
> - start = out = PyByteArray_AS_STRING(v);
> + start = out = PyBytes_AS_STRING(v);
> for (;i < size; ++i) {
> Py_UNICODE ch = s[i];
>
> @@ -1958,10 +1958,9 @@
> *out++= B64(charsleft << (6-bitsleft) );
> *out++ = '-';
> }
> -
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start);
> - Py_DECREF(v);
> - return result;
> + if (_PyBytes_Resize(&v, out - start) < 0)
> + return NULL;
> + return v;
> }
>
> #undef SPECIAL
> @@ -2479,7 +2478,7 @@
> const char *errors,
> int byteorder)
> {
> - PyObject *v, *result;
> + PyObject *v;
> unsigned char *p;
> Py_ssize_t nsize, bytesize;
> #ifndef Py_UNICODE_WIDE
> @@ -2515,11 +2514,11 @@
> bytesize = nsize * 4;
> if (bytesize / 4 != nsize)
> return PyErr_NoMemory();
> - v = PyByteArray_FromStringAndSize(NULL, bytesize);
> + v = PyBytes_FromStringAndSize(NULL, bytesize);
> if (v == NULL)
> return NULL;
>
> - p = (unsigned char *)PyByteArray_AS_STRING(v);
> + p = (unsigned char *)PyBytes_AS_STRING(v);
> if (byteorder == 0)
> STORECHAR(0xFEFF);
> if (size == 0)
> @@ -2556,9 +2555,7 @@
> }
>
> done:
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
> - Py_DECREF(v);
> - return result;
> + return v;
> #undef STORECHAR
> }
>
> @@ -2757,7 +2754,7 @@
> const char *errors,
> int byteorder)
> {
> - PyObject *v, *result;
> + PyObject *v;
> unsigned char *p;
> Py_ssize_t nsize, bytesize;
> #ifdef Py_UNICODE_WIDE
> @@ -2792,11 +2789,11 @@
> bytesize = nsize * 2;
> if (bytesize / 2 != nsize)
> return PyErr_NoMemory();
> - v = PyByteArray_FromStringAndSize(NULL, bytesize);
> + v = PyBytes_FromStringAndSize(NULL, bytesize);
> if (v == NULL)
> return NULL;
>
> - p = (unsigned char *)PyByteArray_AS_STRING(v);
> + p = (unsigned char *)PyBytes_AS_STRING(v);
> if (byteorder == 0)
> STORECHAR(0xFEFF);
> if (size == 0)
> @@ -2828,9 +2825,7 @@
> }
>
> done:
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
> - Py_DECREF(v);
> - return result;
> + return v;
> #undef STORECHAR
> }
>
> @@ -3120,7 +3115,7 @@
> PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
> Py_ssize_t size)
> {
> - PyObject *repr, *result;
> + PyObject *repr;
> char *p;
>
> #ifdef Py_UNICODE_WIDE
> @@ -3147,17 +3142,20 @@
> escape.
> */
>
> + if (size == 0)
> + return PyBytes_FromStringAndSize(NULL, 0);
> +
> if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
> return PyErr_NoMemory();
>
> - repr = PyByteArray_FromStringAndSize(NULL,
> + repr = PyBytes_FromStringAndSize(NULL,
> 2
> + expandsize*size
> + 1);
> if (repr == NULL)
> return NULL;
>
> - p = PyByteArray_AS_STRING(repr);
> + p = PyBytes_AS_STRING(repr);
>
> while (size-- > 0) {
> Py_UNICODE ch = *s++;
> @@ -3249,13 +3247,13 @@
> *p++ = (char) ch;
> }
>
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr),
> - p - PyByteArray_AS_STRING(repr));
> - Py_DECREF(repr);
> - return result;
> + assert(p - PyBytes_AS_STRING(repr) > 0);
> + if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
> + return NULL;
> + return repr;
> }
>
> -PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
> +PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode)
> {
> PyObject *s;
> if (!PyUnicode_Check(unicode)) {
> @@ -3389,7 +3387,7 @@
> PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
> Py_ssize_t size)
> {
> - PyObject *repr, *result;
> + PyObject *repr;
> char *p;
> char *q;
>
> @@ -3402,13 +3400,13 @@
> if (size > PY_SSIZE_T_MAX / expandsize)
> return PyErr_NoMemory();
>
> - repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
> + repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
> if (repr == NULL)
> return NULL;
> if (size == 0)
> - goto done;
> + return repr;
>
> - p = q = PyByteArray_AS_STRING(repr);
> + p = q = PyBytes_AS_STRING(repr);
> while (size-- > 0) {
> Py_UNICODE ch = *s++;
> #ifdef Py_UNICODE_WIDE
> @@ -3468,10 +3466,10 @@
> }
> size = p - q;
>
> - done:
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
> - Py_DECREF(repr);
> - return result;
> + assert(size > 0);
> + if (_PyBytes_Resize(&repr, size) < 0)
> + return NULL;
> + return repr;
> }
>
> PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
> @@ -3706,7 +3704,6 @@
> const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
> PyObject *errorHandler = NULL;
> PyObject *exc = NULL;
> - PyObject *result = NULL;
> /* the following variable is used for caching string comparisons
> * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
> int known_errorHandler = -1;
> @@ -3715,10 +3712,10 @@
> replacements, if we need more, we'll resize */
> if (size == 0)
> return PyBytes_FromStringAndSize(NULL, 0);
> - res = PyByteArray_FromStringAndSize(NULL, size);
> + res = PyBytes_FromStringAndSize(NULL, size);
> if (res == NULL)
> return NULL;
> - str = PyByteArray_AS_STRING(res);
> + str = PyBytes_AS_STRING(res);
> ressize = size;
>
> while (p<endp) {
> @@ -3768,7 +3765,7 @@
> p = collend;
> break;
> case 4: /* xmlcharrefreplace */
> - respos = str - PyByteArray_AS_STRING(res);
> + respos = str - PyBytes_AS_STRING(res);
> /* determine replacement size (temporarily (mis)uses p) */
> for (p = collstart, repsize = 0; p < collend; ++p) {
> if (*p<10)
> @@ -3795,9 +3792,9 @@
> if (requiredsize > ressize) {
> if (requiredsize<2*ressize)
> requiredsize = 2*ressize;
> - if (PyByteArray_Resize(res, requiredsize))
> + if (_PyBytes_Resize(&res, requiredsize))
> goto onError;
> - str = PyByteArray_AS_STRING(res) + respos;
> + str = PyBytes_AS_STRING(res) + respos;
> ressize = requiredsize;
> }
> /* generate replacement (temporarily (mis)uses p) */
> @@ -3815,17 +3812,17 @@
> /* need more space? (at least enough for what we
> have+the replacement+the rest of the string, so
> we won't have to check space for encodable characters) */
> - respos = str - PyByteArray_AS_STRING(res);
> + respos = str - PyBytes_AS_STRING(res);
> repsize = PyUnicode_GET_SIZE(repunicode);
> requiredsize = respos+repsize+(endp-collend);
> if (requiredsize > ressize) {
> if (requiredsize<2*ressize)
> requiredsize = 2*ressize;
> - if (PyByteArray_Resize(res, requiredsize)) {
> + if (_PyBytes_Resize(&res, requiredsize)) {
> Py_DECREF(repunicode);
> goto onError;
> }
> - str = PyByteArray_AS_STRING(res) + respos;
> + str = PyBytes_AS_STRING(res) + respos;
> ressize = requiredsize;
> }
> /* check if there is anything unencodable in the replacement
> @@ -3845,13 +3842,23 @@
> }
> }
> }
> - result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res),
> - str - PyByteArray_AS_STRING(res));
> + /* Resize if we allocated to much */
> + size = str - PyBytes_AS_STRING(res);
> + if (size < ressize) { /* If this falls res will be NULL */
> + assert(size > 0);
> + if (_PyBytes_Resize(&res, size) < 0)
> + goto onError;
> + }
> +
> + Py_XDECREF(errorHandler);
> + Py_XDECREF(exc);
> + return res;
> +
> onError:
> - Py_DECREF(res);
> + Py_XDECREF(res);
> Py_XDECREF(errorHandler);
> Py_XDECREF(exc);
> - return result;
> + return NULL;
> }
>
> PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
> @@ -4104,7 +4111,7 @@
> else {
> /* Extend string object */
> n = PyBytes_Size(*repr);
> - if (_PyBytes_Resize(repr, n + mbcssize) < 0)
> + if (_PyBytes_Resize(&repr, n + mbcssize) < 0)
> return -1;
> }
>
> @@ -4834,7 +4841,8 @@
>
> /* Resize if we allocated to much */
> if (respos<PyBytes_GET_SIZE(res))
> - _PyBytes_Resize(&res, respos);
> + if (_PyBytes_Resize(&res, respos) < 0)
> + goto onError;
>
> Py_XDECREF(exc);
> Py_XDECREF(errorHandler);
> _______________________________________________
> Python-checkins mailing list
> Python-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-checkins
--
Marc-Andre Lemburg
eGenix.com
Professional Python Services directly from the Source (#1, Dec 27 2008)
>>> Python/Zope Consulting and Support ... http://www.egenix.com/
>>> mxODBC.Zope.Database.Adapter ... http://zope.egenix.com/
>>> mxODBC, mxDateTime, mxTextTools ... http://python.egenix.com/
________________________________________________________________________
2008-12-02: Released mxODBC.Connect 1.0.0 http://python.egenix.com/
::: Try our new mxODBC.Connect Python Database Interface for free ! ::::
eGenix.com Software, Skills and Services GmbH Pastor-Loeh-Str.48
D-40764 Langenfeld, Germany. CEO Dipl.-Math. Marc-Andre Lemburg
Registered at Amtsgericht Duesseldorf: HRB 46611
http://www.egenix.com/company/contact/
More information about the Python-checkins
mailing list