[Patches] Unicode Patch Set 2000-04-27

M.-A. Lemburg mal@lemburg.com
Thu, 27 Apr 2000 18:17:34 +0200


This is a multi-part message in MIME format.
--------------473E418C86175FAFC293B440
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

Patch Set Contents:
-------------------

Modules/_tkinter.c:

Fixes a memory leak found by Fredrik Lundh.

Objects/funcobject.c:

Doc strings can now be given as Unicode strings.

Objects/unicodeobject.c:

Fixed a reference leak in the allocator.

Renamed utf8_string to _PyUnicode_AsUTF8String() and made
it external for use by other parts of the interpreter.

Python/getargs.c:

Fixed a memory leak found by Fredrik Lundh. Instead
of PyUnicode_AsUTF8String() we now use _PyUnicode_AsUTF8String()
which returns the string object without incremented refcount
(and assures that the so obtained object remains alive until
the Unicode object is garbage collected).

-- 
Marc-Andre Lemburg
______________________________________________________________________
Business:                                      http://www.lemburg.com/
Python Pages:                           http://www.lemburg.com/python/
--------------473E418C86175FAFC293B440
Content-Type: text/plain; charset=us-ascii;
 name="Unicode-Implementation-2000-04-27.patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline;
 filename="Unicode-Implementation-2000-04-27.patch"

Only in CVS-Python/Lib/test/output: test_winsound
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x PC -x PCbuild -x *.py -x ACKS -x *.txt -x README CVS-Python/Modules/_tkinter.c Python+Unicode/Modules/_tkinter.c
--- CVS-Python/Modules/_tkinter.c	Fri Mar 31 05:29:39 2000
+++ Python+Unicode/Modules/_tkinter.c	Thu Apr 27 17:37:24 2000
@@ -553,8 +553,10 @@
 		PyObject* utf8 = PyUnicode_AsUTF8String (value);
 		if (!utf8)
 			return 0;
-		return Tcl_NewStringObj (PyString_AS_STRING (utf8),
+		result = Tcl_NewStringObj (PyString_AS_STRING (utf8),
 					 PyString_GET_SIZE (utf8));
+		Py_DECREF(utf8);
+		return result;
 	}
 	else {
 		PyObject *v = PyObject_Str(value);
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x PC -x PCbuild -x *.py -x ACKS -x *.txt -x README CVS-Python/Objects/funcobject.c Python+Unicode/Objects/funcobject.c
--- CVS-Python/Objects/funcobject.c	Fri May 22 02:55:34 1998
+++ Python+Unicode/Objects/funcobject.c	Thu Apr 13 18:07:29 2000
@@ -55,7 +55,7 @@
 		consts = ((PyCodeObject *)code)->co_consts;
 		if (PyTuple_Size(consts) >= 1) {
 			doc = PyTuple_GetItem(consts, 0);
-			if (!PyString_Check(doc))
+			if (!PyString_Check(doc) && !PyUnicode_Check(doc))
 				doc = Py_None;
 		}
 		else
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x PC -x PCbuild -x *.py -x ACKS -x *.txt -x README CVS-Python/Objects/unicodeobject.c Python+Unicode/Objects/unicodeobject.c
--- CVS-Python/Objects/unicodeobject.c	Thu Apr 13 11:11:39 2000
+++ Python+Unicode/Objects/unicodeobject.c	Thu Apr 27 17:51:23 2000
@@ -208,8 +208,7 @@
 	    if ((unicode->length < length) &&
 		_PyUnicode_Resize(unicode, length)) {
 		free(unicode->str);
-		PyMem_DEL(unicode);
-		return NULL;
+		goto onError;
 	    }
 	}
 	else
@@ -222,8 +221,10 @@
 	unicode->str = PyMem_NEW(Py_UNICODE, length + 1);
     }
 
-    if (!unicode->str) 
+    if (!unicode->str) {
+	PyErr_NoMemory();
 	goto onError;
+    }
     unicode->str[length] = 0;
     unicode->length = length;
     unicode->hash = -1;
@@ -233,7 +234,6 @@
  onError:
     _Py_ForgetReference((PyObject *)unicode);
     PyMem_DEL(unicode);
-    PyErr_NoMemory();
     return NULL;
 }
 
@@ -707,25 +707,27 @@
 
    The resulting string is cached in the Unicode object for subsequent
    usage by this function. The cached version is needed to implement
-   the character buffer interface.
+   the character buffer interface and will live (at least) as long as
+   the Unicode object itself.
 
    The refcount of the string is *not* incremented.
 
+   *** Exported for internal use by the interpreter only !!! ***
+
 */
 
-static
-PyObject *utf8_string(PyUnicodeObject *self,
+PyObject *_PyUnicode_AsUTF8String(PyObject *unicode,
 		      const char *errors)
 {
-    PyObject *v = self->utf8str;
+    PyObject *v = ((PyUnicodeObject *)unicode)->utf8str;
 
     if (v)
         return v;
-    v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(self),
-			     PyUnicode_GET_SIZE(self),
+    v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+			     PyUnicode_GET_SIZE(unicode),
 			     errors);
     if (v && errors == NULL)
-        self->utf8str = v;
+        ((PyUnicodeObject *)unicode)->utf8str = v;
     return v;
 }
 
@@ -737,7 +739,7 @@
         PyErr_BadArgument();
         return NULL;
     }
-    str = utf8_string((PyUnicodeObject *)unicode, NULL);
+    str = _PyUnicode_AsUTF8String(unicode, NULL);
     if (str == NULL)
         return NULL;
     Py_INCREF(str);
@@ -3183,7 +3185,7 @@
        on. */
     if (self->hash != -1)
 	return self->hash;
-    utf8 = utf8_string(self, NULL);
+    utf8 = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
     if (utf8 == NULL)
 	return -1;
     hash = PyObject_Hash(utf8);
@@ -4087,7 +4089,7 @@
 			"accessing non-existent unicode segment");
         return -1;
     }
-    str = utf8_string(self, NULL);
+    str = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
     if (str == NULL)
 	return -1;
     *ptr = (void *) PyString_AS_STRING(str);
diff -u -rbP -x *.o -x *.pyc -x Makefile -x *~ -x *.so -x add2lib -x pgen -x buildno -x config.* -x libpython* -x python -x Setup -x Setup.local -x Setup.thread -x hassignal -x Makefile.pre -x configure -x *.bak -x *.s -x DEADJOE -x *.rej -x *.orig -x Demo -x CVS -x Doc -x *.orig -x .#* -x distutils -x PC -x PCbuild -x *.py -x ACKS -x *.txt -x README CVS-Python/Python/getargs.c Python+Unicode/Python/getargs.c
--- CVS-Python/Python/getargs.c	Tue Mar 28 22:29:59 2000
+++ Python+Unicode/Python/getargs.c	Thu Apr 27 17:51:08 2000
@@ -444,6 +444,11 @@
 }
 
 
+/* Internal API needed by convertsimple1(): */
+extern 
+PyObject *_PyUnicode_AsUTF8String(PyObject *unicode,
+				  const char *errors);
+
 /* Convert a non-tuple argument.  Return NULL if conversion went OK,
    or a string representing the expected type if the conversion failed.
    When failing, an exception may or may not have been raised.
@@ -589,7 +594,7 @@
 			        if (PyString_Check(arg))
 				    *p = PyString_AS_STRING(arg);
 				else if (PyUnicode_Check(arg)) {
-				    arg = PyUnicode_AsUTF8String(arg);
+				    arg = _PyUnicode_AsUTF8String(arg, NULL);
 				    if (arg == NULL)
 					return "unicode conversion error";
 				    *p = PyString_AS_STRING(arg);
@@ -634,7 +639,7 @@
 				else if (PyString_Check(arg))
 				  *p = PyString_AsString(arg);
 				else if (PyUnicode_Check(arg)) {
-				  arg = PyUnicode_AsUTF8String(arg);
+				  arg = _PyUnicode_AsUTF8String(arg, NULL);
 				  if (arg == NULL)
 				      return "unicode conversion error";
 				  *p = PyString_AS_STRING(arg);
Only in CVS-Python/Lib/test: test_winsound.py

--------------473E418C86175FAFC293B440--