[Python-Dev] "data".decode(encoding) ?!

M.-A. Lemburg mal@lemburg.com
Wed, 02 May 2001 21:55:25 +0200


This is a multi-part message in MIME format.
--------------891C60CC0A920DAE275D45C5
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Guido van Rossum wrote:
> 
> > > Can you provide examples of where this can't be done using the
> > > existing approach?
> >
> > There is no existing elegant approach except hooking up to the
> > codecs directly. Adding .decode() is really a matter of adding
> > symmetry.
> 
> Yes, but symmetry is good except when it isn't. :-)
> 
> > Here are some example of how these two codec methods could
> > be used:
> >
> >       xmltext = binarydata.encode('base64')
> >       ...
> >       binarydata = xmltext.decode('base64')
> >
> >       zzz = data.encode('gzip')
> >       ...
> >       data = zzz.decode('gzip')
> >
> >       jpegimage = gifimage.decode('gif').encode('jpeg')
> >
> >       mp3audio = wavaudio.decode('wav').encode('mp3')
> >
> >       etc.
> 
> How would you do this currently?

By looking up the codecs using the codec registry and
then calling them directly.
 
> > Basically all content transfer encodings can take advantage of
> > these two methods.
> >
> > It's not really code bloat, BTW, since the C API is there;
> > the .decode() method would just expose it.
> 
> Show me the patch and I'll decide whether it's code bloat. :-)

I've attached the patch. Due to a small reorganisation the
patch is a little longer -- symmetry has its price at C level
too ;-)

-- 
Marc-Andre Lemburg
______________________________________________________________________
Company & Consulting:                           http://www.egenix.com/
Python Software:                        http://www.lemburg.com/python/
--------------891C60CC0A920DAE275D45C5
Content-Type: text/plain; charset=us-ascii;
 name="string.decode.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="string.decode.patch"

--- CVS-Python/Include/stringobject.h	Sat Feb 24 10:30:49 2001
+++ Dev-Python/Include/stringobject.h	Wed May  2 21:05:12 2001
@@ -105,10 +105,19 @@ extern DL_IMPORT(PyObject*) PyString_AsE
     PyObject *str,	 	/* string object */
     const char *encoding,	/* encoding */
     const char *errors		/* error handling */
     );
 
+/* Decodes a string object and returns the result as Python string
+   object. */
+
+extern DL_IMPORT(PyObject*) PyString_AsDecodedString(
+    PyObject *str,	 	/* string object */
+    const char *encoding,	/* encoding */
+    const char *errors		/* error handling */
+    );
+
 /* Provides access to the internal data buffer and size of a string
    object or the default encoded version of an Unicode object. Passing
    NULL as *len parameter will force the string buffer to be
    0-terminated (passing a string with embedded NULL characters will
    cause an exception).  */
--- CVS-Python/Objects/stringobject.c	Wed May  2 16:19:22 2001
+++ Dev-Python/Objects/stringobject.c	Wed May  2 21:04:34 2001
@@ -138,42 +138,56 @@ PyString_FromString(const char *str)
 PyObject *PyString_Decode(const char *s,
 			  int size,
 			  const char *encoding,
 			  const char *errors)
 {
-    PyObject *buffer = NULL, *str;
+    PyObject *v, *str;
+
+    str = PyString_FromStringAndSize(s, size);
+    if (str == NULL)
+	return NULL;
+    v = PyString_AsDecodedString(str, encoding, errors);
+    Py_DECREF(str);
+    return v;
+}
+
+PyObject *PyString_AsDecodedString(PyObject *str,
+				   const char *encoding,
+				   const char *errors)
+{
+    PyObject *v;
+
+    if (!PyString_Check(str)) {
+        PyErr_BadArgument();
+        goto onError;
+    }
 
     if (encoding == NULL)
 	encoding = PyUnicode_GetDefaultEncoding();
 
     /* Decode via the codec registry */
-    buffer = PyBuffer_FromMemory((void *)s, size);
-    if (buffer == NULL)
-        goto onError;
-    str = PyCodec_Decode(buffer, encoding, errors);
-    if (str == NULL)
+    v = PyCodec_Decode(str, encoding, errors);
+    if (v == NULL)
         goto onError;
     /* Convert Unicode to a string using the default encoding */
-    if (PyUnicode_Check(str)) {
-	PyObject *temp = str;
-	str = PyUnicode_AsEncodedString(str, NULL, NULL);
+    if (PyUnicode_Check(v)) {
+	PyObject *temp = v;
+	v = PyUnicode_AsEncodedString(v, NULL, NULL);
 	Py_DECREF(temp);
-	if (str == NULL)
+	if (v == NULL)
 	    goto onError;
     }
-    if (!PyString_Check(str)) {
+    if (!PyString_Check(v)) {
         PyErr_Format(PyExc_TypeError,
                      "decoder did not return a string object (type=%.400s)",
-                     str->ob_type->tp_name);
-        Py_DECREF(str);
+                     v->ob_type->tp_name);
+        Py_DECREF(v);
         goto onError;
     }
-    Py_DECREF(buffer);
-    return str;
+    return v;
 
  onError:
-    Py_XDECREF(buffer);
     return NULL;
 }
 
 PyObject *PyString_Encode(const char *s,
 			  int size,
@@ -1773,10 +1780,29 @@ string_encode(PyStringObject *self, PyOb
         return NULL;
     return PyString_AsEncodedString((PyObject *)self, encoding, errors);
 }
 
 
+static char decode__doc__[] =
+"S.decode([encoding[,errors]]) -> string\n\
+\n\
+Return a decoded string version of S. Default encoding is the current\n\
+default string encoding. errors may be given to set a different error\n\
+handling scheme. Default is 'strict' meaning that encoding errors raise\n\
+a ValueError. Other possible values are 'ignore' and 'replace'.";
+
+static PyObject *
+string_decode(PyStringObject *self, PyObject *args)
+{
+    char *encoding = NULL;
+    char *errors = NULL;
+    if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
+        return NULL;
+    return PyString_AsDecodedString((PyObject *)self, encoding, errors);
+}
+
+
 static char expandtabs__doc__[] =
 "S.expandtabs([tabsize]) -> string\n\
 \n\
 Return a copy of S where all tab characters are expanded using spaces.\n\
 If tabsize is not given, a tab size of 8 characters is assumed.";
@@ -2347,10 +2373,11 @@ string_methods[] = {
 	{"title",       (PyCFunction)string_title,       1, title__doc__},
 	{"ljust",       (PyCFunction)string_ljust,       1, ljust__doc__},
 	{"rjust",       (PyCFunction)string_rjust,       1, rjust__doc__},
 	{"center",      (PyCFunction)string_center,      1, center__doc__},
 	{"encode",      (PyCFunction)string_encode,      1, encode__doc__},
+	{"decode",      (PyCFunction)string_decode,      1, decode__doc__},
 	{"expandtabs",  (PyCFunction)string_expandtabs,  1, expandtabs__doc__},
 	{"splitlines",  (PyCFunction)string_splitlines,  1, splitlines__doc__},
 #if 0
 	{"zfill",       (PyCFunction)string_zfill,       1, zfill__doc__},
 #endif

--------------891C60CC0A920DAE275D45C5--