[Python-checkins] cpython: Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`, rename

Fri Aug 1 12:34:48 CEST 2014

http://hg.python.org/cpython/rev/93a798c7f270
changeset:   91953:93a798c7f270
user:        Victor Stinner <victor.stinner at gmail.com>
date:        Fri Aug 01 12:28:48 2014 +0200
summary:
  Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`, rename
``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document these
functions.

files:
  Doc/c-api/sys.rst       |  54 +++++++++++++++++++++++
  Doc/c-api/unicode.rst   |  35 +++++++++------
  Doc/library/codecs.rst  |   1 +
  Doc/library/os.rst      |   7 +-
  Include/fileutils.h     |   4 +-
  Misc/NEWS               |   4 +
  Misc/coverity_model.c   |   2 +-
  Modules/getpath.c       |  16 +++---
  Modules/main.c          |   4 +-
  Objects/unicodeobject.c |   8 +-
  Programs/python.c       |   2 +-
  Python/fileutils.c      |  67 +++++++++++++++-------------
  Python/frozenmain.c     |   2 +-
  13 files changed, 138 insertions(+), 68 deletions(-)

diff --git a/Doc/c-api/sys.rst b/Doc/c-api/sys.rst
--- a/Doc/c-api/sys.rst
+++ b/Doc/c-api/sys.rst
@@ -47,6 +47,60 @@
    not call those functions directly!  :c:type:`PyOS_sighandler_t` is a typedef
    alias for :c:type:`void (\*)(int)`.
 
+.. c:function:: wchar_t* Py_DecodeLocale(const char* arg, size_t *size)
+
+   Decode a byte string from the locale encoding with the :ref:`surrogateescape
+   error handler <surrogateescape>`: undecodable bytes are decoded as
+   characters in range U+DC80..U+DCFF. If a byte sequence can be decoded as a
+   surrogate character, escape the bytes using the surrogateescape error
+   handler instead of decoding them.
+
+   Return a pointer to a newly allocated wide character string, use
+   :c:func:`PyMem_RawFree` to free the memory. If size is not ``NULL``, write
+   the number of wide characters excluding the null character into ``*size``
+
+   Return ``NULL`` on decoding error or memory allocation error. If *size* is
+   not ``NULL``, ``*size`` is set to ``(size_t)-1`` on memory error or set to
+   ``(size_t)-2`` on decoding error.
+
+   Decoding errors should never happen, unless there is a bug in the C
+   library.
+
+   Use the :c:func:`Py_EncodeLocale` function to encode the character string
+   back to a byte string.
+
+   .. seealso::
+
+      The :c:func:`PyUnicode_DecodeFSDefaultAndSize` and
+      :c:func:`PyUnicode_DecodeLocaleAndSize` functions.
+
+   .. versionadded:: 3.5
+
+
+.. c:function:: char* Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
+
+   Encode a wide character string to the locale encoding with the
+   :ref:`surrogateescape error handler <surrogateescape>`: surrogate characters
+   in the range U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
+
+   Return a pointer to a newly allocated byte string, use :c:func:`PyMem_Free`
+   to free the memory. Return ``NULL`` on encoding error or memory allocation
+   error
+
+   If error_pos is not ``NULL``, ``*error_pos`` is set to the index of the
+   invalid character on encoding error, or set to ``(size_t)-1`` otherwise.
+
+   Use the :c:func:`Py_DecodeLocale` function to decode the bytes string back
+   to a wide character string.
+
+   .. seealso::
+
+      The :c:func:`PyUnicode_EncodeFSDefault` and
+      :c:func:`PyUnicode_EncodeLocale` functions.
+
+   .. versionadded:: 3.5
+
+
 .. _systemfunctions:
 
 System Functions
diff --git a/Doc/c-api/unicode.rst b/Doc/c-api/unicode.rst
--- a/Doc/c-api/unicode.rst
+++ b/Doc/c-api/unicode.rst
@@ -758,11 +758,13 @@
    *errors* is ``NULL``.  *str* must end with a null character but
    cannot contain embedded null characters.
 
+   Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
+   :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+   Python startup).
+
    .. seealso::
 
-      Use :c:func:`PyUnicode_DecodeFSDefaultAndSize` to decode a string from
-      :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
-      Python startup).
+      The :c:func:`Py_DecodeLocale` function.
 
    .. versionadded:: 3.3
 
@@ -783,11 +785,13 @@
    *errors* is ``NULL``. Return a :class:`bytes` object. *str* cannot
    contain embedded null characters.
 
+   Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
+   :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
+   Python startup).
+
    .. seealso::
 
-      Use :c:func:`PyUnicode_EncodeFSDefault` to encode a string to
-      :c:data:`Py_FileSystemDefaultEncoding` (the locale encoding read at
-      Python startup).
+      The :c:func:`Py_EncodeLocale` function.
 
    .. versionadded:: 3.3
 
@@ -832,12 +836,14 @@
    If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
    locale encoding.
 
+   :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+   locale encoding and cannot be modified later. If you need to decode a string
+   from the current locale encoding, use
+   :c:func:`PyUnicode_DecodeLocaleAndSize`.
+
    .. seealso::
 
-      :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
-      locale encoding and cannot be modified later. If you need to decode a
-      string from the current locale encoding, use
-      :c:func:`PyUnicode_DecodeLocaleAndSize`.
+      The :c:func:`Py_DecodeLocale` function.
 
    .. versionchanged:: 3.2
       Use ``"strict"`` error handler on Windows.
@@ -867,12 +873,13 @@
    If :c:data:`Py_FileSystemDefaultEncoding` is not set, fall back to the
    locale encoding.
 
+   :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
+   locale encoding and cannot be modified later. If you need to encode a string
+   to the current locale encoding, use :c:func:`PyUnicode_EncodeLocale`.
+
    .. seealso::
 
-      :c:data:`Py_FileSystemDefaultEncoding` is initialized at startup from the
-      locale encoding and cannot be modified later. If you need to encode a
-      string to the current locale encoding, use
-      :c:func:`PyUnicode_EncodeLocale`.
+      The :c:func:`Py_EncodeLocale` function.
 
    .. versionadded:: 3.2
 
diff --git a/Doc/library/codecs.rst b/Doc/library/codecs.rst
--- a/Doc/library/codecs.rst
+++ b/Doc/library/codecs.rst
@@ -318,6 +318,7 @@
    encodings.
 
 
+.. _surrogateescape:
 .. _codec-base-classes:
 
 Codec Base Classes
diff --git a/Doc/library/os.rst b/Doc/library/os.rst
--- a/Doc/library/os.rst
+++ b/Doc/library/os.rst
@@ -78,9 +78,10 @@
 
 .. versionchanged:: 3.1
    On some systems, conversion using the file system encoding may fail. In this
-   case, Python uses the ``surrogateescape`` encoding error handler, which means
-   that undecodable bytes are replaced by a Unicode character U+DCxx on
-   decoding, and these are again translated to the original byte on encoding.
+   case, Python uses the :ref:`surrogateescape encoding error handler
+   <surrogateescape>`, which means that undecodable bytes are replaced by a
+   Unicode character U+DCxx on decoding, and these are again translated to the
+   original byte on encoding.
 
 
 The file system encoding must guarantee to successfully decode all bytes
diff --git a/Include/fileutils.h b/Include/fileutils.h
--- a/Include/fileutils.h
+++ b/Include/fileutils.h
@@ -7,11 +7,11 @@
 
 PyAPI_FUNC(PyObject *) _Py_device_encoding(int);
 
-PyAPI_FUNC(wchar_t *) _Py_char2wchar(
+PyAPI_FUNC(wchar_t *) Py_DecodeLocale(
     const char *arg,
     size_t *size);
 
-PyAPI_FUNC(char*) _Py_wchar2char(
+PyAPI_FUNC(char*) Py_EncodeLocale(
     const wchar_t *text,
     size_t *error_pos);
 
diff --git a/Misc/NEWS b/Misc/NEWS
--- a/Misc/NEWS
+++ b/Misc/NEWS
@@ -10,6 +10,10 @@
 Core and Builtins
 -----------------
 
+- Issue #18395: Rename ``_Py_char2wchar()`` to :c:func:`Py_DecodeLocale`,
+  rename ``_Py_wchar2char()`` to :c:func:`Py_EncodeLocale`, and document
+  these functions.
+
 - Issue #20179: Apply Argument Clinic to bytes and bytearray.
   Patch by Tal Einat.
 
diff --git a/Misc/coverity_model.c b/Misc/coverity_model.c
--- a/Misc/coverity_model.c
+++ b/Misc/coverity_model.c
@@ -85,7 +85,7 @@
 }
 
 /* Python/fileutils.c */
-wchar_t *_Py_char2wchar(const char* arg, size_t *size)
+wchar_t *Py_DecodeLocale(const char* arg, size_t *size)
 {
    wchar_t *w;
     __coverity_tainted_data_sink__(arg);
diff --git a/Modules/getpath.c b/Modules/getpath.c
--- a/Modules/getpath.c
+++ b/Modules/getpath.c
@@ -336,7 +336,7 @@
     joinpath(prefix, L"Modules/Setup");
     if (isfile(prefix)) {
         /* Check VPATH to see if argv0_path is in the build directory. */
-        vpath = _Py_char2wchar(VPATH, NULL);
+        vpath = Py_DecodeLocale(VPATH, NULL);
         if (vpath != NULL) {
             wcsncpy(prefix, argv0_path, MAXPATHLEN);
             prefix[MAXPATHLEN] = L'\0';
@@ -491,10 +491,10 @@
     wchar_t *_pythonpath, *_prefix, *_exec_prefix;
     wchar_t *lib_python;
 
-    _pythonpath = _Py_char2wchar(PYTHONPATH, NULL);
-    _prefix = _Py_char2wchar(PREFIX, NULL);
-    _exec_prefix = _Py_char2wchar(EXEC_PREFIX, NULL);
-    lib_python = _Py_char2wchar("lib/python" VERSION, NULL);
+    _pythonpath = Py_DecodeLocale(PYTHONPATH, NULL);
+    _prefix = Py_DecodeLocale(PREFIX, NULL);
+    _exec_prefix = Py_DecodeLocale(EXEC_PREFIX, NULL);
+    lib_python = Py_DecodeLocale("lib/python" VERSION, NULL);
 
     if (!_pythonpath || !_prefix || !_exec_prefix || !lib_python) {
         Py_FatalError(
@@ -503,7 +503,7 @@
     }
 
     if (_path) {
-        path_buffer = _Py_char2wchar(_path, NULL);
+        path_buffer = Py_DecodeLocale(_path, NULL);
         path = path_buffer;
     }
 
@@ -584,7 +584,7 @@
         ** be running the interpreter in the build directory, so we use the
         ** build-directory-specific logic to find Lib and such.
         */
-        wchar_t* wbuf = _Py_char2wchar(modPath, NULL);
+        wchar_t* wbuf = Py_DecodeLocale(modPath, NULL);
         if (wbuf == NULL) {
             Py_FatalError("Cannot decode framework location");
         }
@@ -709,7 +709,7 @@
 
     if (_rtpypath && _rtpypath[0] != '\0') {
         size_t rtpypath_len;
-        rtpypath = _Py_char2wchar(_rtpypath, &rtpypath_len);
+        rtpypath = Py_DecodeLocale(_rtpypath, &rtpypath_len);
         if (rtpypath != NULL)
             bufsz += rtpypath_len + 1;
     }
diff --git a/Modules/main.c b/Modules/main.c
--- a/Modules/main.c
+++ b/Modules/main.c
@@ -647,7 +647,7 @@
             /* Used by Mac/Tools/pythonw.c to forward
              * the argv0 of the stub executable
              */
-            wchar_t* wbuf = _Py_char2wchar(pyvenv_launcher, NULL);
+            wchar_t* wbuf = Py_DecodeLocale(pyvenv_launcher, NULL);
 
             if (wbuf == NULL) {
                 Py_FatalError("Cannot decode __PYVENV_LAUNCHER__");
@@ -730,7 +730,7 @@
                 char *cfilename_buffer;
                 const char *cfilename;
                 int err = errno;
-                cfilename_buffer = _Py_wchar2char(filename, NULL);
+                cfilename_buffer = Py_EncodeLocale(filename, NULL);
                 if (cfilename_buffer != NULL)
                     cfilename = cfilename_buffer;
                 else
diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@@ -3255,7 +3255,7 @@
         /* "surrogateescape" error handler */
         char *str;
 
-        str = _Py_wchar2char(wstr, &error_pos);
+        str = Py_EncodeLocale(wstr, &error_pos);
         if (str == NULL) {
             if (error_pos == (size_t)-1) {
                 PyErr_NoMemory();
@@ -3308,7 +3308,7 @@
 
     if (errmsg != NULL) {
         size_t errlen;
-        wstr = _Py_char2wchar(errmsg, &errlen);
+        wstr = Py_DecodeLocale(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
             PyMem_RawFree(wstr);
@@ -3526,7 +3526,7 @@
 
     if (surrogateescape) {
         /* "surrogateescape" error handler */
-        wstr = _Py_char2wchar(str, &wlen);
+        wstr = Py_DecodeLocale(str, &wlen);
         if (wstr == NULL) {
             if (wlen == (size_t)-1)
                 PyErr_NoMemory();
@@ -3581,7 +3581,7 @@
     error_pos = mbstowcs_errorpos(str, len);
     if (errmsg != NULL) {
         size_t errlen;
-        wstr = _Py_char2wchar(errmsg, &errlen);
+        wstr = Py_DecodeLocale(errmsg, &errlen);
         if (wstr != NULL) {
             reason = PyUnicode_FromWideChar(wstr, errlen);
             PyMem_RawFree(wstr);
diff --git a/Programs/python.c b/Programs/python.c
--- a/Programs/python.c
+++ b/Programs/python.c
@@ -52,7 +52,7 @@
 
     setlocale(LC_ALL, "");
     for (i = 0; i < argc; i++) {
-        argv_copy[i] = _Py_char2wchar(argv[i], NULL);
+        argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
         if (!argv_copy[i]) {
             PyMem_RawFree(oldloc);
             fprintf(stderr, "Fatal Python error: "
diff --git a/Python/fileutils.c b/Python/fileutils.c
--- a/Python/fileutils.c
+++ b/Python/fileutils.c
@@ -82,11 +82,11 @@
 
    Values of force_ascii:
 
-       1: the workaround is used: _Py_wchar2char() uses
-          encode_ascii_surrogateescape() and _Py_char2wchar() uses
+       1: the workaround is used: Py_EncodeLocale() uses
+          encode_ascii_surrogateescape() and Py_DecodeLocale() uses
           decode_ascii_surrogateescape()
-       0: the workaround is not used: _Py_wchar2char() uses wcstombs() and
-          _Py_char2wchar() uses mbstowcs()
+       0: the workaround is not used: Py_EncodeLocale() uses wcstombs() and
+          Py_DecodeLocale() uses mbstowcs()
       -1: unknown, need to call check_force_ascii() to get the value
 */
 static int force_ascii = -1;
@@ -241,24 +241,26 @@
 
 
 /* Decode a byte string from the locale encoding with the
-   surrogateescape error handler (undecodable bytes are decoded as characters
-   in range U+DC80..U+DCFF). If a byte sequence can be decoded as a surrogate
+   surrogateescape error handler: undecodable bytes are decoded as characters
+   in range U+DC80..U+DCFF. If a byte sequence can be decoded as a surrogate
    character, escape the bytes using the surrogateescape error handler instead
    of decoding them.
 
-   Use _Py_wchar2char() to encode the character string back to a byte string.
+   Return a pointer to a newly allocated wide character string, use
+   PyMem_RawFree() to free the memory. If size is not NULL, write the number of
+   wide characters excluding the null character into *size
 
-   Return a pointer to a newly allocated wide character string (use
-   PyMem_RawFree() to free the memory) and write the number of written wide
-   characters excluding the null character into *size if size is not NULL, or
-   NULL on error (decoding or memory allocation error). If size is not NULL,
-   *size is set to (size_t)-1 on memory error and (size_t)-2 on decoding
-   error.
+   Return NULL on decoding error or memory allocation error. If *size* is not
+   NULL, *size is set to (size_t)-1 on memory error or set to (size_t)-2 on
+   decoding error.
 
-   Conversion errors should never happen, unless there is a bug in the C
-   library. */
+   Decoding errors should never happen, unless there is a bug in the C
+   library.
+
+   Use the Py_EncodeLocale() function to encode the character string back to a
+   byte string. */
 wchar_t*
-_Py_char2wchar(const char* arg, size_t *size)
+Py_DecodeLocale(const char* arg, size_t *size)
 {
 #ifdef __APPLE__
     wchar_t *wstr;
@@ -389,19 +391,20 @@
 #endif   /* __APPLE__ */
 }
 
-/* Encode a (wide) character string to the locale encoding with the
-   surrogateescape error handler (characters in range U+DC80..U+DCFF are
-   converted to bytes 0x80..0xFF).
+/* Encode a wide character string to the locale encoding with the
+   surrogateescape error handler: surrogate characters in the range
+   U+DC80..U+DCFF are converted to bytes 0x80..0xFF.
 
-   This function is the reverse of _Py_char2wchar().
+   Return a pointer to a newly allocated byte string, use PyMem_Free() to free
+   the memory. Return NULL on encoding or memory allocation error.
 
-   Return a pointer to a newly allocated byte string (use PyMem_Free() to free
-   the memory), or NULL on encoding or memory allocation error.
+   If error_pos is not NULL, *error_pos is set to the index of the invalid
+   character on encoding error, or set to (size_t)-1 otherwise.
 
-   If error_pos is not NULL: *error_pos is the index of the invalid character
-   on encoding error, or (size_t)-1 otherwise. */
+   Use the Py_DecodeLocale() function to decode the bytes string back to a wide
+   character string. */
 char*
-_Py_wchar2char(const wchar_t *text, size_t *error_pos)
+Py_EncodeLocale(const wchar_t *text, size_t *error_pos)
 {
 #ifdef __APPLE__
     Py_ssize_t len;
@@ -520,7 +523,7 @@
 {
     int err;
     char *fname;
-    fname = _Py_wchar2char(path, NULL);
+    fname = Py_EncodeLocale(path, NULL);
     if (fname == NULL) {
         errno = EINVAL;
         return -1;
@@ -784,7 +787,7 @@
         errno = EINVAL;
         return NULL;
     }
-    cpath = _Py_wchar2char(path, NULL);
+    cpath = Py_EncodeLocale(path, NULL);
     if (cpath == NULL)
         return NULL;
     f = fopen(cpath, cmode);
@@ -875,7 +878,7 @@
     int res;
     size_t r1;
 
-    cpath = _Py_wchar2char(path, NULL);
+    cpath = Py_EncodeLocale(path, NULL);
     if (cpath == NULL) {
         errno = EINVAL;
         return -1;
@@ -889,7 +892,7 @@
         return -1;
     }
     cbuf[res] = '\0'; /* buf will be null terminated */
-    wbuf = _Py_char2wchar(cbuf, &r1);
+    wbuf = Py_DecodeLocale(cbuf, &r1);
     if (wbuf == NULL) {
         errno = EINVAL;
         return -1;
@@ -920,7 +923,7 @@
     wchar_t *wresolved_path;
     char *res;
     size_t r;
-    cpath = _Py_wchar2char(path, NULL);
+    cpath = Py_EncodeLocale(path, NULL);
     if (cpath == NULL) {
         errno = EINVAL;
         return NULL;
@@ -930,7 +933,7 @@
     if (res == NULL)
         return NULL;
 
-    wresolved_path = _Py_char2wchar(cresolved_path, &r);
+    wresolved_path = Py_DecodeLocale(cresolved_path, &r);
     if (wresolved_path == NULL) {
         errno = EINVAL;
         return NULL;
@@ -963,7 +966,7 @@
 
     if (getcwd(fname, Py_ARRAY_LENGTH(fname)) == NULL)
         return NULL;
-    wname = _Py_char2wchar(fname, &len);
+    wname = Py_DecodeLocale(fname, &len);
     if (wname == NULL)
         return NULL;
     if (size <= len) {
diff --git a/Python/frozenmain.c b/Python/frozenmain.c
--- a/Python/frozenmain.c
+++ b/Python/frozenmain.c
@@ -52,7 +52,7 @@
 
     setlocale(LC_ALL, "");
     for (i = 0; i < argc; i++) {
-        argv_copy[i] = _Py_char2wchar(argv[i], NULL);
+        argv_copy[i] = Py_DecodeLocale(argv[i], NULL);
         argv_copy2[i] = argv_copy[i];
         if (!argv_copy[i]) {
             fprintf(stderr, "Unable to decode the command line argument #%i\n",

-- 
Repository URL: http://hg.python.org/cpython