[Python-checkins] r67939 - python/branches/py3k/Objects/unicodeobject.c

Sat Dec 27 14:07:42 CET 2008

Alexandre,

could you please point me to the ticket or discussion of this
change ?

While I agree with the change (codecs should not use or return
mutable byte arrays), I do think that such changes must get some
more attention before being checked in.

Thanks.

On 2008-12-27 10:16, alexandre.vassalotti wrote:
> Author: alexandre.vassalotti
> Date: Sat Dec 27 10:16:49 2008
> New Revision: 67939
> 
> Log:
> Optimize built-in unicode codecs by avoiding unecessary copying.
> 
> The approach used is similiar to what is currently used in the version
> of unicodeobject.c in Python 2.x. The only difference is we use
> _PyBytes_Resize instead of _PyString_Resize.
> 
> 
> Modified:
>    python/branches/py3k/Objects/unicodeobject.c
> 
> Modified: python/branches/py3k/Objects/unicodeobject.c
> ==============================================================================
> --- python/branches/py3k/Objects/unicodeobject.c	(original)
> +++ python/branches/py3k/Objects/unicodeobject.c	Sat Dec 27 10:16:49 2008
> @@ -1873,7 +1873,7 @@
>                     int encodeWhiteSpace,
>                     const char *errors)
>  {
> -    PyObject *v, *result;
> +    PyObject *v;
>      /* It might be possible to tighten this worst case */
>      Py_ssize_t cbAllocated = 5 * size;
>      int inShift = 0;
> @@ -1889,11 +1889,11 @@
>      if (cbAllocated / 5 != size)
>          return PyErr_NoMemory();
>  
> -    v = PyByteArray_FromStringAndSize(NULL, cbAllocated);
> +    v = PyBytes_FromStringAndSize(NULL, cbAllocated);
>      if (v == NULL)
>          return NULL;
>  
> -    start = out = PyByteArray_AS_STRING(v);
> +    start = out = PyBytes_AS_STRING(v);
>      for (;i < size; ++i) {
>          Py_UNICODE ch = s[i];
>  
> @@ -1958,10 +1958,9 @@
>          *out++= B64(charsleft << (6-bitsleft) );
>          *out++ = '-';
>      }
> -
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), out - start);
> -    Py_DECREF(v);
> -    return result;
> +    if (_PyBytes_Resize(&v, out - start) < 0)
> +        return NULL;
> +    return v;
>  }
>  
>  #undef SPECIAL
> @@ -2479,7 +2478,7 @@
>  		      const char *errors,
>  		      int byteorder)
>  {
> -    PyObject *v, *result;
> +    PyObject *v;
>      unsigned char *p;
>      Py_ssize_t nsize, bytesize;
>  #ifndef Py_UNICODE_WIDE
> @@ -2515,11 +2514,11 @@
>      bytesize = nsize * 4;
>      if (bytesize / 4 != nsize)
>  	return PyErr_NoMemory();
> -    v = PyByteArray_FromStringAndSize(NULL, bytesize);
> +    v = PyBytes_FromStringAndSize(NULL, bytesize);
>      if (v == NULL)
>          return NULL;
>  
> -    p = (unsigned char *)PyByteArray_AS_STRING(v);
> +    p = (unsigned char *)PyBytes_AS_STRING(v);
>      if (byteorder == 0)
>  	STORECHAR(0xFEFF);
>      if (size == 0)
> @@ -2556,9 +2555,7 @@
>      }
>  
>    done:
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
> -    Py_DECREF(v);
> -    return result;
> +    return v;
>  #undef STORECHAR
>  }
>  
> @@ -2757,7 +2754,7 @@
>  		      const char *errors,
>  		      int byteorder)
>  {
> -    PyObject *v, *result;
> +    PyObject *v;
>      unsigned char *p;
>      Py_ssize_t nsize, bytesize;
>  #ifdef Py_UNICODE_WIDE
> @@ -2792,11 +2789,11 @@
>      bytesize = nsize * 2;
>      if (bytesize / 2 != nsize)
>  	return PyErr_NoMemory();
> -    v = PyByteArray_FromStringAndSize(NULL, bytesize);
> +    v = PyBytes_FromStringAndSize(NULL, bytesize);
>      if (v == NULL)
>          return NULL;
>  
> -    p = (unsigned char *)PyByteArray_AS_STRING(v);
> +    p = (unsigned char *)PyBytes_AS_STRING(v);
>      if (byteorder == 0)
>  	STORECHAR(0xFEFF);
>      if (size == 0)
> @@ -2828,9 +2825,7 @@
>      }
>  
>    done:
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(v), Py_SIZE(v));
> -    Py_DECREF(v);
> -    return result;
> +    return v;
>  #undef STORECHAR
>  }
>  
> @@ -3120,7 +3115,7 @@
>  PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s,
>  					Py_ssize_t size)
>  {
> -    PyObject *repr, *result;
> +    PyObject *repr;
>      char *p;
>  
>  #ifdef Py_UNICODE_WIDE
> @@ -3147,17 +3142,20 @@
>         escape.
>      */
>  
> +    if (size == 0)
> +        return PyBytes_FromStringAndSize(NULL, 0);
> +
>      if (size > (PY_SSIZE_T_MAX - 2 - 1) / expandsize)
>  	return PyErr_NoMemory();
>  
> -    repr = PyByteArray_FromStringAndSize(NULL,
> +    repr = PyBytes_FromStringAndSize(NULL,
>          2
>          + expandsize*size
>          + 1);
>      if (repr == NULL)
>          return NULL;
>  
> -    p = PyByteArray_AS_STRING(repr);
> +    p = PyBytes_AS_STRING(repr);
>  
>      while (size-- > 0) {
>          Py_UNICODE ch = *s++;
> @@ -3249,13 +3247,13 @@
>              *p++ = (char) ch;
>      }
>  
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr),
> -                                        p - PyByteArray_AS_STRING(repr));
> -    Py_DECREF(repr);
> -    return result;
> +    assert(p - PyBytes_AS_STRING(repr) > 0);
> +    if (_PyBytes_Resize(&repr, p - PyBytes_AS_STRING(repr)) < 0)
> +        return NULL;
> +    return repr;
>  }
>  
> -PyObject *PyUnicode_AsUnicodeEscapeString(PyObject *unicode)
> +PyObject *PyUnicodeAsUnicodeEscapeString(PyObject *unicode)
>  {
>      PyObject *s;
>      if (!PyUnicode_Check(unicode)) {
> @@ -3389,7 +3387,7 @@
>  PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s,
>  					   Py_ssize_t size)
>  {
> -    PyObject *repr, *result;
> +    PyObject *repr;
>      char *p;
>      char *q;
>  
> @@ -3402,13 +3400,13 @@
>      if (size > PY_SSIZE_T_MAX / expandsize)
>  	return PyErr_NoMemory();
>      
> -    repr = PyByteArray_FromStringAndSize(NULL, expandsize * size);
> +    repr = PyBytes_FromStringAndSize(NULL, expandsize * size);
>      if (repr == NULL)
>          return NULL;
>      if (size == 0)
> -        goto done;
> +        return repr;
>  
> -    p = q = PyByteArray_AS_STRING(repr);
> +    p = q = PyBytes_AS_STRING(repr);
>      while (size-- > 0) {
>          Py_UNICODE ch = *s++;
>  #ifdef Py_UNICODE_WIDE
> @@ -3468,10 +3466,10 @@
>      }
>      size = p - q;
>  
> -  done:
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(repr), size);
> -    Py_DECREF(repr);
> -    return result;
> +    assert(size > 0);
> +    if (_PyBytes_Resize(&repr, size) < 0)
> +        return NULL;
> +    return repr;
>  }
>  
>  PyObject *PyUnicode_AsRawUnicodeEscapeString(PyObject *unicode)
> @@ -3706,7 +3704,6 @@
>      const char *reason = (limit == 256) ? "ordinal not in range(256)" : "ordinal not in range(128)";
>      PyObject *errorHandler = NULL;
>      PyObject *exc = NULL;
> -    PyObject *result = NULL;
>      /* the following variable is used for caching string comparisons
>       * -1=not initialized, 0=unknown, 1=strict, 2=replace, 3=ignore, 4=xmlcharrefreplace */
>      int known_errorHandler = -1;
> @@ -3715,10 +3712,10 @@
>         replacements, if we need more, we'll resize */
>      if (size == 0)
>          return PyBytes_FromStringAndSize(NULL, 0);
> -    res = PyByteArray_FromStringAndSize(NULL, size);
> +    res = PyBytes_FromStringAndSize(NULL, size);
>      if (res == NULL)
>          return NULL;
> -    str = PyByteArray_AS_STRING(res);
> +    str = PyBytes_AS_STRING(res);
>      ressize = size;
>  
>      while (p<endp) {
> @@ -3768,7 +3765,7 @@
>  		    p = collend;
>  		    break;
>  		case 4: /* xmlcharrefreplace */
> -		    respos = str - PyByteArray_AS_STRING(res);
> +		    respos = str - PyBytes_AS_STRING(res);
>  		    /* determine replacement size (temporarily (mis)uses p) */
>  		    for (p = collstart, repsize = 0; p < collend; ++p) {
>  			if (*p<10)
> @@ -3795,9 +3792,9 @@
>  		    if (requiredsize > ressize) {
>  			if (requiredsize<2*ressize)
>  			    requiredsize = 2*ressize;
> -			if (PyByteArray_Resize(res, requiredsize))
> +			if (_PyBytes_Resize(&res, requiredsize))
>  			    goto onError;
> -			str = PyByteArray_AS_STRING(res) + respos;
> +			str = PyBytes_AS_STRING(res) + respos;
>  			ressize = requiredsize;
>  		    }
>  		    /* generate replacement (temporarily (mis)uses p) */
> @@ -3815,17 +3812,17 @@
>  		    /* need more space? (at least enough for what we
>  		       have+the replacement+the rest of the string, so
>  		       we won't have to check space for encodable characters) */
> -		    respos = str - PyByteArray_AS_STRING(res);
> +		    respos = str - PyBytes_AS_STRING(res);
>  		    repsize = PyUnicode_GET_SIZE(repunicode);
>  		    requiredsize = respos+repsize+(endp-collend);
>  		    if (requiredsize > ressize) {
>  			if (requiredsize<2*ressize)
>  			    requiredsize = 2*ressize;
> -			if (PyByteArray_Resize(res, requiredsize)) {
> +			if (_PyBytes_Resize(&res, requiredsize)) {
>  			    Py_DECREF(repunicode);
>  			    goto onError;
>  			}
> -			str = PyByteArray_AS_STRING(res) + respos;
> +			str = PyBytes_AS_STRING(res) + respos;
>  			ressize = requiredsize;
>  		    }
>  		    /* check if there is anything unencodable in the replacement
> @@ -3845,13 +3842,23 @@
>  	    }
>  	}
>      }
> -    result = PyBytes_FromStringAndSize(PyByteArray_AS_STRING(res),
> -                                        str - PyByteArray_AS_STRING(res));
> +    /* Resize if we allocated to much */
> +    size = str - PyBytes_AS_STRING(res);
> +    if (size < ressize) { /* If this falls res will be NULL */
> +        assert(size > 0);
> +        if (_PyBytes_Resize(&res, size) < 0)
> +            goto onError;
> +    }
> +
> +    Py_XDECREF(errorHandler);
> +    Py_XDECREF(exc);
> +    return res;
> +
>    onError:
> -    Py_DECREF(res);
> +    Py_XDECREF(res);
>      Py_XDECREF(errorHandler);
>      Py_XDECREF(exc);
> -    return result;
> +    return NULL;
>  }
>  
>  PyObject *PyUnicode_EncodeLatin1(const Py_UNICODE *p,
> @@ -4104,7 +4111,7 @@
>      else {
>  	/* Extend string object */
>  	n = PyBytes_Size(*repr);
> -	if (_PyBytes_Resize(repr, n + mbcssize) < 0)
> +	if (_PyBytes_Resize(&repr, n + mbcssize) < 0)
>  	    return -1;
>      }
>  
> @@ -4834,7 +4841,8 @@
>  
>      /* Resize if we allocated to much */
>      if (respos<PyBytes_GET_SIZE(res))
> -	_PyBytes_Resize(&res, respos);
> +        if (_PyBytes_Resize(&res, respos) < 0)
> +            goto onError;
>  
>      Py_XDECREF(exc);
>      Py_XDECREF(errorHandler);
> _______________________________________________
> Python-checkins mailing list
> Python-checkins at python.org
> http://mail.python.org/mailman/listinfo/python-checkins

-- 
Marc-Andre Lemburg
eGenix.com

Professional Python Services directly from the Source  (#1, Dec 27 2008)
>>> Python/Zope Consulting and Support ...        http://www.egenix.com/
>>> mxODBC.Zope.Database.Adapter ...             http://zope.egenix.com/
>>> mxODBC, mxDateTime, mxTextTools ...        http://python.egenix.com/
________________________________________________________________________
2008-12-02: Released mxODBC.Connect 1.0.0      http://python.egenix.com/

::: Try our new mxODBC.Connect Python Database Interface for free ! ::::

   eGenix.com Software, Skills and Services GmbH  Pastor-Loeh-Str.48
    D-40764 Langenfeld, Germany. CEO Dipl.-Math. Marc-Andre Lemburg
           Registered at Amtsgericht Duesseldorf: HRB 46611
               http://www.egenix.com/company/contact/