[Python-checkins] cpython: Improve string forms and PyUnicode_Resize() documentation

victor.stinner python-checkins at python.org
Mon Oct 3 23:35:47 CEST 2011


http://hg.python.org/cpython/rev/fe10f0bcc860
changeset:   72621:fe10f0bcc860
user:        Victor Stinner <victor.stinner at haypocalc.com>
date:        Mon Oct 03 23:19:21 2011 +0200
summary:
  Improve string forms and PyUnicode_Resize() documentation

Remove also the FIXME for resize_copy(): as discussed with Martin, copy the
string on resize if the string is not resizable is just fine.

files:
  Include/unicodeobject.h |  35 ++++++++++++++++++----------
  Objects/unicodeobject.c |   4 +-
  2 files changed, 24 insertions(+), 15 deletions(-)


diff --git a/Include/unicodeobject.h b/Include/unicodeobject.h
--- a/Include/unicodeobject.h
+++ b/Include/unicodeobject.h
@@ -206,7 +206,7 @@
    immediately follow the structure. utf8_length and wstr_length can be found
    in the length field; the utf8 pointer is equal to the data pointer. */
 typedef struct {
-    /* Unicode strings can be in 4 states:
+    /* There a 4 forms of Unicode strings:
 
        - compact ascii:
 
@@ -227,7 +227,7 @@
          * ascii = 0
          * utf8 != data
 
-       - string created by the legacy API (not ready):
+       - legacy string, not ready:
 
          * structure = PyUnicodeObject
          * kind = PyUnicode_WCHAR_KIND
@@ -239,7 +239,7 @@
          * interned = SSTATE_NOT_INTERNED
          * ascii = 0
 
-       - string created by the legacy API, ready:
+       - legacy string, ready:
 
          * structure = PyUnicodeObject structure
          * kind = PyUnicode_1BYTE_KIND, PyUnicode_2BYTE_KIND or
@@ -249,10 +249,16 @@
          * data.any is not NULL
          * utf8 = data if ascii is 1
 
-       String created by the legacy API becomes ready when calling
-       PyUnicode_READY().
+       Compact strings use only one memory block (structure + characters),
+       whereas legacy strings use one block for the structure and one block
+       for characters.
 
-       See also _PyUnicode_CheckConsistency(). */
+       Legacy strings are created by PyUnicode_FromUnicode() and
+       PyUnicode_FromStringAndSize(NULL, size) functions. They become ready
+       when PyUnicode_READY() is called.
+
+       See also _PyUnicode_CheckConsistency().
+    */
     PyObject_HEAD
     Py_ssize_t length;          /* Number of code points in the string */
     Py_hash_t hash;             /* Hash value; -1 if not set */
@@ -721,19 +727,22 @@
 PyAPI_FUNC(Py_UNICODE) PyUnicode_GetMax(void);
 #endif
 
-/* Resize an already allocated Unicode object to the new size length.
+/* Resize an Unicode object allocated by the legacy API (e.g.
+   PyUnicode_FromUnicode). Unicode objects allocated by the new API (e.g.
+   PyUnicode_New) cannot be resized by this function.
+
+   The length is a number of Py_UNICODE characters (and not the number of code
+   points).
 
    *unicode is modified to point to the new (resized) object and 0
    returned on success.
 
-   This API may only be called by the function which also called the
-   Unicode constructor. The refcount on the object must be 1. Otherwise,
-   an error is returned.
+   If the refcount on the object is 1, the function resizes the string in
+   place, which is usually faster than allocating a new string (and copy
+   characters).
 
    Error handling is implemented as follows: an exception is set, -1
-   is returned and *unicode left untouched.
-
-*/
+   is returned and *unicode left untouched. */
 
 PyAPI_FUNC(int) PyUnicode_Resize(
     PyObject **unicode,         /* Pointer to the Unicode object */
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -536,7 +536,8 @@
             return NULL;
         }
         return copy;
-    } else {
+    }
+    else {
         PyUnicodeObject *w;
         assert(_PyUnicode_WSTR(unicode) != NULL);
         assert(_PyUnicode_DATA_ANY(unicode) == NULL);
@@ -1294,7 +1295,6 @@
     if (old_length == length)
         return 0;
 
-    /* FIXME: really create a new object? */
     if (!unicode_resizable(unicode)) {
         PyObject *copy = resize_copy(unicode, length);
         if (copy == NULL)

-- 
Repository URL: http://hg.python.org/cpython


More information about the Python-checkins mailing list