[Python-checkins] CVS: python/dist/src/Objects unicodeobject.c,2.54,2.55

M.-A. Lemburg python-dev@python.org
Thu, 3 Aug 2000 11:44:31 -0700


Update of /cvsroot/python/python/dist/src/Objects
In directory slayer.i.sourceforge.net:/tmp/cvs-serv8693/Objects

Modified Files:
	unicodeobject.c 
Log Message:
This patch finalizes the move from UTF-8 to a default encoding in
the Python Unicode implementation.

The internal buffer used for implementing the buffer protocol
is renamed to defenc to make this change visible. It now holds the
default encoded version of the Unicode object and is calculated
on demand (NULL otherwise). 

Since the default encoding defaults to ASCII, this will mean that
Unicode objects which hold non-ASCII characters will no longer
work on C APIs using the "s" or "t" parser markers. C APIs must now
explicitly provide Unicode support via the "u", "U" or "es"/"es#"
parser markers in order to work with non-ASCII Unicode strings.

(Note: this patch will also have to be applied to the 1.6 branch
 of the CVS tree.)

Index: unicodeobject.c
===================================================================
RCS file: /cvsroot/python/python/dist/src/Objects/unicodeobject.c,v
retrieving revision 2.54
retrieving revision 2.55
diff -C2 -r2.54 -r2.55
*** unicodeobject.c	2000/08/03 16:24:25	2.54
--- unicodeobject.c	2000/08/03 18:44:28	2.55
***************
*** 166,172 ****
   reset:
      /* Reset the object caches */
!     if (unicode->utf8str) {
!         Py_DECREF(unicode->utf8str);
!         unicode->utf8str = NULL;
      }
      unicode->hash = -1;
--- 166,172 ----
   reset:
      /* Reset the object caches */
!     if (unicode->defenc) {
!         Py_DECREF(unicode->defenc);
!         unicode->defenc = NULL;
      }
      unicode->hash = -1;
***************
*** 244,248 ****
      unicode->length = length;
      unicode->hash = -1;
!     unicode->utf8str = NULL;
      return unicode;
  
--- 244,248 ----
      unicode->length = length;
      unicode->hash = -1;
!     unicode->defenc = NULL;
      return unicode;
  
***************
*** 263,269 ****
  	    unicode->length = 0;
  	}
! 	if (unicode->utf8str) {
! 	    Py_DECREF(unicode->utf8str);
! 	    unicode->utf8str = NULL;
  	}
  	/* Add to free list */
--- 263,269 ----
  	    unicode->length = 0;
  	}
! 	if (unicode->defenc) {
! 	    Py_DECREF(unicode->defenc);
! 	    unicode->defenc = NULL;
  	}
  	/* Add to free list */
***************
*** 274,278 ****
      else {
  	PyMem_DEL(unicode->str);
! 	Py_XDECREF(unicode->utf8str);
  	PyObject_DEL(unicode);
      }
--- 274,278 ----
      else {
  	PyMem_DEL(unicode->str);
! 	Py_XDECREF(unicode->defenc);
  	PyObject_DEL(unicode);
      }
***************
*** 530,533 ****
--- 530,560 ----
  }
  
+ /* Return a Python string holding the default encoded value of the
+    Unicode object. 
+ 
+    The resulting string is cached in the Unicode object for subsequent
+    usage by this function. The cached version is needed to implement
+    the character buffer interface and will live (at least) as long as
+    the Unicode object itself.
+ 
+    The refcount of the string is *not* incremented.
+ 
+    *** Exported for internal use by the interpreter only !!! ***
+ 
+ */
+ 
+ PyObject *_PyUnicode_AsDefaultEncodedString(PyObject *unicode,
+ 					    const char *errors)
+ {
+     PyObject *v = ((PyUnicodeObject *)unicode)->defenc;
+ 
+     if (v)
+         return v;
+     v = PyUnicode_AsEncodedString(unicode, NULL, errors);
+     if (v && errors == NULL)
+         ((PyUnicodeObject *)unicode)->defenc = v;
+     return v;
+ }
+ 
  Py_UNICODE *PyUnicode_AsUnicode(PyObject *unicode)
  {
***************
*** 875,907 ****
  }
  
- /* Return a Python string holding the UTF-8 encoded value of the
-    Unicode object. 
- 
-    The resulting string is cached in the Unicode object for subsequent
-    usage by this function. The cached version is needed to implement
-    the character buffer interface and will live (at least) as long as
-    the Unicode object itself.
- 
-    The refcount of the string is *not* incremented.
- 
-    *** Exported for internal use by the interpreter only !!! ***
- 
- */
- 
- PyObject *_PyUnicode_AsUTF8String(PyObject *unicode,
- 				  const char *errors)
- {
-     PyObject *v = ((PyUnicodeObject *)unicode)->utf8str;
- 
-     if (v)
-         return v;
-     v = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
- 			     PyUnicode_GET_SIZE(unicode),
- 			     errors);
-     if (v && errors == NULL)
-         ((PyUnicodeObject *)unicode)->utf8str = v;
-     return v;
- }
- 
  PyObject *PyUnicode_AsUTF8String(PyObject *unicode)
  {
--- 902,905 ----
***************
*** 912,916 ****
          return NULL;
      }
!     str = _PyUnicode_AsUTF8String(unicode, NULL);
      if (str == NULL)
          return NULL;
--- 910,916 ----
          return NULL;
      }
!     str = PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
! 			       PyUnicode_GET_SIZE(unicode),
! 			       NULL);
      if (str == NULL)
          return NULL;
***************
*** 4520,4524 ****
          return -1;
      }
!     str = _PyUnicode_AsUTF8String((PyObject *)self, NULL);
      if (str == NULL)
  	return -1;
--- 4520,4524 ----
          return -1;
      }
!     str = _PyUnicode_AsDefaultEncodedString((PyObject *)self, NULL);
      if (str == NULL)
  	return -1;
***************
*** 5131,5135 ****
  	if (v->str)
  	    PyMem_DEL(v->str);
! 	Py_XDECREF(v->utf8str);
  	PyObject_DEL(v);
      }
--- 5131,5135 ----
  	if (v->str)
  	    PyMem_DEL(v->str);
! 	Py_XDECREF(v->defenc);
  	PyObject_DEL(v);
      }