[Python-checkins] gh-106320: Move _PyUnicodeWriter to the internal C API (#106342)

vstinner webhook-mailer at python.org
Mon Jul 3 04:23:47 EDT 2023


https://github.com/python/cpython/commit/5ccbbe5bb9a659fa8f2fe551428c84cc14015f44
commit: 5ccbbe5bb9a659fa8f2fe551428c84cc14015f44
branch: main
author: Victor Stinner <vstinner at python.org>
committer: vstinner <vstinner at python.org>
date: 2023-07-03T10:23:43+02:00
summary:

gh-106320: Move _PyUnicodeWriter to the internal C API (#106342)

Move also _PyUnicode_FormatAdvancedWriter().

CJK codecs and multibytecodec.c now define the Py_BUILD_CORE_MODULE
macro.

files:
M Include/cpython/unicodeobject.h
M Include/internal/pycore_complexobject.h
M Include/internal/pycore_floatobject.h
M Include/internal/pycore_unicodeobject.h
M Modules/cjkcodecs/cjkcodecs.h
M Modules/cjkcodecs/multibytecodec.c
M Modules/cjkcodecs/multibytecodec.h
M Tools/c-analyzer/c_parser/preprocessor/gcc.py

diff --git a/Include/cpython/unicodeobject.h b/Include/cpython/unicodeobject.h
index dee8b27d3d97d..c5892a80d2c54 100644
--- a/Include/cpython/unicodeobject.h
+++ b/Include/cpython/unicodeobject.h
@@ -480,131 +480,6 @@ PyAPI_FUNC(Py_UCS4) _PyUnicode_FindMaxChar (
     Py_ssize_t start,
     Py_ssize_t end);
 
-/* --- _PyUnicodeWriter API ----------------------------------------------- */
-
-typedef struct {
-    PyObject *buffer;
-    void *data;
-    int kind;
-    Py_UCS4 maxchar;
-    Py_ssize_t size;
-    Py_ssize_t pos;
-
-    /* minimum number of allocated characters (default: 0) */
-    Py_ssize_t min_length;
-
-    /* minimum character (default: 127, ASCII) */
-    Py_UCS4 min_char;
-
-    /* If non-zero, overallocate the buffer (default: 0). */
-    unsigned char overallocate;
-
-    /* If readonly is 1, buffer is a shared string (cannot be modified)
-       and size is set to 0. */
-    unsigned char readonly;
-} _PyUnicodeWriter ;
-
-/* Initialize a Unicode writer.
- *
- * By default, the minimum buffer size is 0 character and overallocation is
- * disabled. Set min_length, min_char and overallocate attributes to control
- * the allocation of the buffer. */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
-
-/* Prepare the buffer to write 'length' characters
-   with the specified maximum character.
-
-   Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
-    (((MAXCHAR) <= (WRITER)->maxchar                                  \
-      && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
-     ? 0                                                              \
-     : (((LENGTH) == 0)                                               \
-        ? 0                                                           \
-        : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
-   instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
-                                 Py_ssize_t length, Py_UCS4 maxchar);
-
-/* Prepare the buffer to have at least the kind KIND.
-   For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
-   support characters in range U+000-U+FFFF.
-
-   Return 0 on success, raise an exception and return -1 on error. */
-#define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
-    ((KIND) <= (WRITER)->kind                                         \
-     ? 0                                                              \
-     : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
-
-/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
-   macro instead. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
-                                     int kind);
-
-/* Append a Unicode character.
-   Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
-    Py_UCS4 ch
-    );
-
-/* Append a Unicode string.
-   Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
-    PyObject *str               /* Unicode string */
-    );
-
-/* Append a substring of a Unicode string.
-   Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
-    PyObject *str,              /* Unicode string */
-    Py_ssize_t start,
-    Py_ssize_t end
-    );
-
-/* Append an ASCII-encoded byte string.
-   Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
-    const char *str,           /* ASCII-encoded byte string */
-    Py_ssize_t len             /* number of bytes, or -1 if unknown */
-    );
-
-/* Append a latin1-encoded byte string.
-   Return 0 on success, raise an exception and return -1 on error. */
-PyAPI_FUNC(int)
-_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
-    const char *str,           /* latin1-encoded byte string */
-    Py_ssize_t len             /* length in bytes */
-    );
-
-/* Get the value of the writer as a Unicode string. Clear the
-   buffer of the writer. Raise an exception and return NULL
-   on error. */
-PyAPI_FUNC(PyObject *)
-_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
-
-/* Deallocate memory of a writer (clear its internal buffer). */
-PyAPI_FUNC(void)
-_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
-
-
-/* Format the object based on the format_spec, as defined in PEP 3101
-   (Advanced String Formatting). */
-PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
-    _PyUnicodeWriter *writer,
-    PyObject *obj,
-    PyObject *format_spec,
-    Py_ssize_t start,
-    Py_ssize_t end);
-
 /* --- Manage the default encoding ---------------------------------------- */
 
 /* Returns a pointer to the default encoding (UTF-8) of the
@@ -774,20 +649,6 @@ PyAPI_FUNC(PyObject *) _PyUnicode_XStrip(
     PyObject *sepobj
     );
 
-/* Using explicit passed-in values, insert the thousands grouping
-   into the string pointed to by buffer.  For the argument descriptions,
-   see Objects/stringlib/localeutil.h */
-PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
-    _PyUnicodeWriter *writer,
-    Py_ssize_t n_buffer,
-    PyObject *digits,
-    Py_ssize_t d_pos,
-    Py_ssize_t n_digits,
-    Py_ssize_t min_width,
-    const char *grouping,
-    PyObject *thousands_sep,
-    Py_UCS4 *maxchar);
-
 /* === Characters Type APIs =============================================== */
 
 /* These should not be used directly. Use the Py_UNICODE_IS* and
diff --git a/Include/internal/pycore_complexobject.h b/Include/internal/pycore_complexobject.h
index fb344b7bb79bd..7843c0a008ebb 100644
--- a/Include/internal/pycore_complexobject.h
+++ b/Include/internal/pycore_complexobject.h
@@ -8,6 +8,8 @@ extern "C" {
 #  error "this header requires Py_BUILD_CORE define"
 #endif
 
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
 /* Operations on complex numbers from complexmodule.c */
 
 PyAPI_FUNC(Py_complex) _Py_c_sum(Py_complex, Py_complex);
diff --git a/Include/internal/pycore_floatobject.h b/Include/internal/pycore_floatobject.h
index 27c63bc87f3ee..6abba04033d28 100644
--- a/Include/internal/pycore_floatobject.h
+++ b/Include/internal/pycore_floatobject.h
@@ -9,6 +9,8 @@ extern "C" {
 #endif
 
 
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
 /* runtime lifecycle */
 
 extern void _PyFloat_InitState(PyInterpreterState *);
diff --git a/Include/internal/pycore_unicodeobject.h b/Include/internal/pycore_unicodeobject.h
index 1bb0f366e7816..a8c7f1957f360 100644
--- a/Include/internal/pycore_unicodeobject.h
+++ b/Include/internal/pycore_unicodeobject.h
@@ -14,7 +14,148 @@ extern "C" {
 void _PyUnicode_ExactDealloc(PyObject *op);
 Py_ssize_t _PyUnicode_InternedSize(void);
 
-/* runtime lifecycle */
+/* --- _PyUnicodeWriter API ----------------------------------------------- */
+
+typedef struct {
+    PyObject *buffer;
+    void *data;
+    int kind;
+    Py_UCS4 maxchar;
+    Py_ssize_t size;
+    Py_ssize_t pos;
+
+    /* minimum number of allocated characters (default: 0) */
+    Py_ssize_t min_length;
+
+    /* minimum character (default: 127, ASCII) */
+    Py_UCS4 min_char;
+
+    /* If non-zero, overallocate the buffer (default: 0). */
+    unsigned char overallocate;
+
+    /* If readonly is 1, buffer is a shared string (cannot be modified)
+       and size is set to 0. */
+    unsigned char readonly;
+} _PyUnicodeWriter ;
+
+/* Initialize a Unicode writer.
+ *
+ * By default, the minimum buffer size is 0 character and overallocation is
+ * disabled. Set min_length, min_char and overallocate attributes to control
+ * the allocation of the buffer. */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Init(_PyUnicodeWriter *writer);
+
+/* Prepare the buffer to write 'length' characters
+   with the specified maximum character.
+
+   Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_Prepare(WRITER, LENGTH, MAXCHAR)             \
+    (((MAXCHAR) <= (WRITER)->maxchar                                  \
+      && (LENGTH) <= (WRITER)->size - (WRITER)->pos)                  \
+     ? 0                                                              \
+     : (((LENGTH) == 0)                                               \
+        ? 0                                                           \
+        : _PyUnicodeWriter_PrepareInternal((WRITER), (LENGTH), (MAXCHAR))))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_Prepare() macro
+   instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareInternal(_PyUnicodeWriter *writer,
+                                 Py_ssize_t length, Py_UCS4 maxchar);
+
+/* Prepare the buffer to have at least the kind KIND.
+   For example, kind=PyUnicode_2BYTE_KIND ensures that the writer will
+   support characters in range U+000-U+FFFF.
+
+   Return 0 on success, raise an exception and return -1 on error. */
+#define _PyUnicodeWriter_PrepareKind(WRITER, KIND)                    \
+    ((KIND) <= (WRITER)->kind                                         \
+     ? 0                                                              \
+     : _PyUnicodeWriter_PrepareKindInternal((WRITER), (KIND)))
+
+/* Don't call this function directly, use the _PyUnicodeWriter_PrepareKind()
+   macro instead. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_PrepareKindInternal(_PyUnicodeWriter *writer,
+                                     int kind);
+
+/* Append a Unicode character.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteChar(_PyUnicodeWriter *writer,
+    Py_UCS4 ch
+    );
+
+/* Append a Unicode string.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteStr(_PyUnicodeWriter *writer,
+    PyObject *str               /* Unicode string */
+    );
+
+/* Append a substring of a Unicode string.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteSubstring(_PyUnicodeWriter *writer,
+    PyObject *str,              /* Unicode string */
+    Py_ssize_t start,
+    Py_ssize_t end
+    );
+
+/* Append an ASCII-encoded byte string.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteASCIIString(_PyUnicodeWriter *writer,
+    const char *str,           /* ASCII-encoded byte string */
+    Py_ssize_t len             /* number of bytes, or -1 if unknown */
+    );
+
+/* Append a latin1-encoded byte string.
+   Return 0 on success, raise an exception and return -1 on error. */
+PyAPI_FUNC(int)
+_PyUnicodeWriter_WriteLatin1String(_PyUnicodeWriter *writer,
+    const char *str,           /* latin1-encoded byte string */
+    Py_ssize_t len             /* length in bytes */
+    );
+
+/* Get the value of the writer as a Unicode string. Clear the
+   buffer of the writer. Raise an exception and return NULL
+   on error. */
+PyAPI_FUNC(PyObject *)
+_PyUnicodeWriter_Finish(_PyUnicodeWriter *writer);
+
+/* Deallocate memory of a writer (clear its internal buffer). */
+PyAPI_FUNC(void)
+_PyUnicodeWriter_Dealloc(_PyUnicodeWriter *writer);
+
+
+/* Format the object based on the format_spec, as defined in PEP 3101
+   (Advanced String Formatting). */
+PyAPI_FUNC(int) _PyUnicode_FormatAdvancedWriter(
+    _PyUnicodeWriter *writer,
+    PyObject *obj,
+    PyObject *format_spec,
+    Py_ssize_t start,
+    Py_ssize_t end);
+
+/* --- Methods & Slots ---------------------------------------------------- */
+
+/* Using explicit passed-in values, insert the thousands grouping
+   into the string pointed to by buffer.  For the argument descriptions,
+   see Objects/stringlib/localeutil.h */
+PyAPI_FUNC(Py_ssize_t) _PyUnicode_InsertThousandsGrouping(
+    _PyUnicodeWriter *writer,
+    Py_ssize_t n_buffer,
+    PyObject *digits,
+    Py_ssize_t d_pos,
+    Py_ssize_t n_digits,
+    Py_ssize_t min_width,
+    const char *grouping,
+    PyObject *thousands_sep,
+    Py_UCS4 *maxchar);
+
+/* --- Runtime lifecycle -------------------------------------------------- */
 
 extern void _PyUnicode_InitState(PyInterpreterState *);
 extern PyStatus _PyUnicode_InitGlobalObjects(PyInterpreterState *);
@@ -24,7 +165,7 @@ extern void _PyUnicode_FiniTypes(PyInterpreterState *);
 
 extern PyTypeObject _PyUnicodeASCIIIter_Type;
 
-/* other API */
+/* --- Other API ---------------------------------------------------------- */
 
 struct _Py_unicode_runtime_ids {
     PyThread_type_lock lock;
diff --git a/Modules/cjkcodecs/cjkcodecs.h b/Modules/cjkcodecs/cjkcodecs.h
index 48cdcfb3ad308..97290aac3ba43 100644
--- a/Modules/cjkcodecs/cjkcodecs.h
+++ b/Modules/cjkcodecs/cjkcodecs.h
@@ -7,6 +7,10 @@
 #ifndef _CJKCODECS_H_
 #define _CJKCODECS_H_
 
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
 #include "Python.h"
 #include "multibytecodec.h"
 
diff --git a/Modules/cjkcodecs/multibytecodec.c b/Modules/cjkcodecs/multibytecodec.c
index cf437d09f1fe8..3febd1a832f9c 100644
--- a/Modules/cjkcodecs/multibytecodec.c
+++ b/Modules/cjkcodecs/multibytecodec.c
@@ -4,6 +4,10 @@
  * Written by Hye-Shik Chang <perky at FreeBSD.org>
  */
 
+#ifndef Py_BUILD_CORE_BUILTIN
+#  define Py_BUILD_CORE_MODULE 1
+#endif
+
 #include "Python.h"
 #include "structmember.h"         // PyMemberDef
 #include "multibytecodec.h"
diff --git a/Modules/cjkcodecs/multibytecodec.h b/Modules/cjkcodecs/multibytecodec.h
index f59362205d26f..5b85e2e3de316 100644
--- a/Modules/cjkcodecs/multibytecodec.h
+++ b/Modules/cjkcodecs/multibytecodec.h
@@ -10,6 +10,8 @@
 extern "C" {
 #endif
 
+#include "pycore_unicodeobject.h" // _PyUnicodeWriter
+
 #ifdef uint16_t
 typedef uint16_t ucs2_t, DBCHAR;
 #else
diff --git a/Tools/c-analyzer/c_parser/preprocessor/gcc.py b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
index 0929f7111eac9..415a2ba63dfe6 100644
--- a/Tools/c-analyzer/c_parser/preprocessor/gcc.py
+++ b/Tools/c-analyzer/c_parser/preprocessor/gcc.py
@@ -3,6 +3,14 @@
 
 from . import common as _common
 
+# Modules/socketmodule.h uses pycore_time.h which needs the Py_BUILD_CORE
+# macro. Usually it's defined by the C file which includes it.
+# Other header files have a similar issue.
+NEED_BUILD_CORE = {
+    'cjkcodecs.h',
+    'multibytecodec.h',
+    'socketmodule.h',
+}
 
 TOOL = 'gcc'
 
@@ -62,9 +70,7 @@ def preprocess(filename,
     filename = _normpath(filename, cwd)
 
     postargs = POST_ARGS
-    if os.path.basename(filename) == 'socketmodule.h':
-        # Modules/socketmodule.h uses pycore_time.h which needs Py_BUILD_CORE.
-        # Usually it's defined by the C file which includes it.
+    if os.path.basename(filename) in NEED_BUILD_CORE:
         postargs += ('-DPy_BUILD_CORE=1',)
 
     text = _common.preprocess(



More information about the Python-checkins mailing list