[Python-checkins] gh-105156: Cleanup usage of old Py_UNICODE type (#105158)

vstinner webhook-mailer at python.org
Thu Jun 1 03:18:16 EDT 2023


https://github.com/python/cpython/commit/7d07e5891d2843f269fac00dc8847abfe3671765
commit: 7d07e5891d2843f269fac00dc8847abfe3671765
branch: main
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2023-06-01T07:18:09Z
summary:

gh-105156: Cleanup usage of old Py_UNICODE type (#105158)

* refcounts.dat:

  * Remove Py_UNICODE functions.
  * Replace Py_UNICODE argument type with wchar_t.

* _PyUnicode_ToLowercase(), _PyUnicode_ToUppercase(),
  _PyUnicode_ToTitlecase() are no longer deprecated in comments.
  It's no longer needed since they now use Py_UCS4 type, rather than
  the deprecated Py_UNICODE type.
* gdb: Remove unused char_width() method.

files:
M Doc/data/refcounts.dat
M Include/cpython/unicodeobject.h
M Objects/stringlib/README.txt
M Tools/gdb/libpython.py

diff --git a/Doc/data/refcounts.dat b/Doc/data/refcounts.dat
index ee64ffdc91662..ef37b83487809 100644
--- a/Doc/data/refcounts.dat
+++ b/Doc/data/refcounts.dat
@@ -2374,76 +2374,56 @@ PyUnicode_KIND:PyObject*:o:0:
 PyUnicode_MAX_CHAR_VALUE::::
 PyUnicode_MAX_CHAR_VALUE:PyObject*:o:0:
 
-PyUnicode_AS_UNICODE:Py_UNICODE*:::
-PyUnicode_AS_UNICODE:PyObject*:o:0:
-
-PyUnicode_AS_DATA:const char*:::
-PyUnicode_AS_DATA:PyObject*:o:0:
-
 Py_UNICODE_ISALNUM:int:::
-Py_UNICODE_ISALNUM:Py_UNICODE:ch::
+Py_UNICODE_ISALNUM:Py_UCS4:ch::
 
 Py_UNICODE_ISALPHA:int:::
-Py_UNICODE_ISALPHA:Py_UNICODE:ch::
+Py_UNICODE_ISALPHA:Py_UCS4:ch::
 
 Py_UNICODE_ISSPACE:int:::
-Py_UNICODE_ISSPACE:Py_UNICODE:ch::
+Py_UNICODE_ISSPACE:Py_UCS4:ch::
 
 Py_UNICODE_ISLOWER:int:::
-Py_UNICODE_ISLOWER:Py_UNICODE:ch::
+Py_UNICODE_ISLOWER:Py_UCS4:ch::
 
 Py_UNICODE_ISUPPER:int:::
-Py_UNICODE_ISUPPER:Py_UNICODE:ch::
+Py_UNICODE_ISUPPER:Py_UCS4:ch::
 
 Py_UNICODE_ISTITLE:int:::
-Py_UNICODE_ISTITLE:Py_UNICODE:ch::
+Py_UNICODE_ISTITLE:Py_UCS4:ch::
 
 Py_UNICODE_ISLINEBREAK:int:::
-Py_UNICODE_ISLINEBREAK:Py_UNICODE:ch::
+Py_UNICODE_ISLINEBREAK:Py_UCS4:ch::
 
 Py_UNICODE_ISDECIMAL:int:::
-Py_UNICODE_ISDECIMAL:Py_UNICODE:ch::
+Py_UNICODE_ISDECIMAL:Py_UCS4:ch::
 
 Py_UNICODE_ISDIGIT:int:::
-Py_UNICODE_ISDIGIT:Py_UNICODE:ch::
+Py_UNICODE_ISDIGIT:Py_UCS4:ch::
 
 Py_UNICODE_ISNUMERIC:int:::
-Py_UNICODE_ISNUMERIC:Py_UNICODE:ch::
+Py_UNICODE_ISNUMERIC:Py_UCS4:ch::
 
 Py_UNICODE_ISPRINTABLE:int:::
-Py_UNICODE_ISPRINTABLE:Py_UNICODE:ch::
+Py_UNICODE_ISPRINTABLE:Py_UCS4:ch::
 
-Py_UNICODE_TOLOWER:Py_UNICODE:::
-Py_UNICODE_TOLOWER:Py_UNICODE:ch::
+Py_UNICODE_TOLOWER:Py_UCS4:::
+Py_UNICODE_TOLOWER:Py_UCS4:ch::
 
-Py_UNICODE_TOUPPER:Py_UNICODE:::
-Py_UNICODE_TOUPPER:Py_UNICODE:ch::
+Py_UNICODE_TOUPPER:Py_UCS4:::
+Py_UNICODE_TOUPPER:Py_UCS4:ch::
 
-Py_UNICODE_TOTITLE:Py_UNICODE:::
-Py_UNICODE_TOTITLE:Py_UNICODE:ch::
+Py_UNICODE_TOTITLE:Py_UCS4:::
+Py_UNICODE_TOTITLE:Py_UCS4:ch::
 
 Py_UNICODE_TODECIMAL:int:::
-Py_UNICODE_TODECIMAL:Py_UNICODE:ch::
+Py_UNICODE_TODECIMAL:Py_UCS4:ch::
 
 Py_UNICODE_TODIGIT:int:::
-Py_UNICODE_TODIGIT:Py_UNICODE:ch::
+Py_UNICODE_TODIGIT:Py_UCS4:ch::
 
 Py_UNICODE_TONUMERIC:double:::
-Py_UNICODE_TONUMERIC:Py_UNICODE:ch::
-
-PyUnicode_FromUnicode:PyObject*::+1:
-PyUnicode_FromUnicode:const Py_UNICODE*:u::
-PyUnicode_FromUnicode:Py_ssize_t:size::
-
-PyUnicode_AsUnicode:Py_UNICODE*:::
-PyUnicode_AsUnicode:PyObject*:unicode:0:
-
-PyUnicode_AsUnicodeAndSize:Py_UNICODE*:::
-PyUnicode_AsUnicodeAndSize:PyObject*:unicode:0:
-PyUnicode_AsUnicodeAndSize:Py_ssize_t*:size::
-
-PyUnicode_GetSize:Py_ssize_t:::
-PyUnicode_GetSize:PyObject*:unicode:0:
+Py_UNICODE_TONUMERIC:Py_UCS4:ch::
 
 PyUnicode_FromObject:PyObject*::+1:
 PyUnicode_FromObject:PyObject*:obj:0:
diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index 92e7afde427bd..dee8b27d3d97d 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -379,8 +379,6 @@ static inline Py_UCS4 PyUnicode_MAX_CHAR_VALUE(PyObject *op)
 
 /* === Public API ========================================================= */
 
-/* --- Plain Py_UNICODE --------------------------------------------------- */
-
 /* With PEP 393, this is the recommended way to allocate a new unicode object.
    This function will allocate the object and its buffer in a single memory
    block.  Objects created using this function are not resizable. */
@@ -827,15 +825,15 @@ PyAPI_FUNC(int) _PyUnicode_IsLinebreak(
     const Py_UCS4 ch         /* Unicode character */
     );
 
-/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToLowercase(
     Py_UCS4 ch       /* Unicode character */
     );
 
-/* Py_DEPRECATED(3.3) */ PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToUppercase(
     Py_UCS4 ch       /* Unicode character */
     );
 
-Py_DEPRECATED(3.3) PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
+PyAPI_FUNC(Py_UCS4) _PyUnicode_ToTitlecase(
     Py_UCS4 ch       /* Unicode character */
     );
 
diff --git a/Objects/stringlib/README.txt b/Objects/stringlib/README.txt
index e1e329290acbb..26f3d02b0eff3 100644
--- a/Objects/stringlib/README.txt
+++ b/Objects/stringlib/README.txt
@@ -9,7 +9,7 @@ the following defines used by the different modules:
 
 STRINGLIB_CHAR
 
-    the type used to hold a character (char or Py_UNICODE)
+    the type used to hold a character (char, Py_UCS1, Py_UCS2 or Py_UCS4)
 
 STRINGLIB_GET_EMPTY()
 
diff --git a/Tools/gdb/libpython.py b/Tools/gdb/libpython.py
index e38bd59e20a30..79b8c7527c230 100755
--- a/Tools/gdb/libpython.py
+++ b/Tools/gdb/libpython.py
@@ -1390,10 +1390,6 @@ def _unichr_is_printable(char):
 class PyUnicodeObjectPtr(PyObjectPtr):
     _typename = 'PyUnicodeObject'
 
-    def char_width(self):
-        _type_Py_UNICODE = gdb.lookup_type('Py_UNICODE')
-        return _type_Py_UNICODE.sizeof
-
     def proxyval(self, visited):
         compact = self.field('_base')
         ascii = compact['_base']
@@ -1414,13 +1410,13 @@ def proxyval(self, visited):
         elif repr_kind == 4:
             field_str = field_str.cast(_type_unsigned_int_ptr())
 
-        # Gather a list of ints from the Py_UNICODE array; these are either
+        # Gather a list of ints from the code point array; these are either
         # UCS-1, UCS-2 or UCS-4 code points:
-        Py_UNICODEs = [int(field_str[i]) for i in safe_range(field_length)]
+        code_points = [int(field_str[i]) for i in safe_range(field_length)]
 
         # Convert the int code points to unicode characters, and generate a
         # local unicode instance.
-        result = u''.join(map(chr, Py_UNICODEs))
+        result = u''.join(map(chr, code_points))
         return result
 
     def write_repr(self, out, visited):



More information about the Python-checkins mailing list