[Python-checkins] CVS: python/dist/src/Doc/api api.tex,1.61,1.62

Fred Drake python-dev@python.org
Thu, 6 Apr 2000 10:10:31 -0400


Update of /projects/cvsroot/python/dist/src/Doc/api
In directory seahag.cnri.reston.va.us:/home/fdrake/projects/python/Doc/api

Modified Files:
	api.tex 
Log Message:

Marc-Andre Lemburg <mal@lemburg.com>:
API documentation for Unicode support from C.


Index: api.tex
===================================================================
RCS file: /projects/cvsroot/python/dist/src/Doc/api/api.tex,v
retrieving revision 1.61
retrieving revision 1.62
diff -C2 -r1.61 -r1.62
*** api.tex	2000/04/03 15:42:13	1.61
--- api.tex	2000/04/06 14:10:29	1.62
***************
*** 1900,1903 ****
--- 1900,2615 ----
  
  
+ \subsection{Unicode Objects \label{unicodeObjects}}
+ \sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
+ 
+ %--- Unicode Type -------------------------------------------------------
+ 
+ These are the basic Unicode object types used for the Unicode
+ implementation in Python:
+ 
+ \begin{ctypedesc}{Py_UNICODE}
+ This type represents a 16-bit unsigned storage type which is used by
+ Python internally as basis for holding Unicode ordinals. On platforms
+ where \ctype{wchar_t} is available and also has 16-bits,
+ \ctype{Py_UNICODE} is a typedef alias for \ctype{wchar_t} to enhance
+ native platform compatibility. On all other platforms,
+ \ctype{Py_UNICODE} is a typedef alias for \ctype{unsigned short}.
+ \end{ctypedesc}
+ 
+ \begin{ctypedesc}{PyUnicodeObject}
+ This subtype of \ctype{PyObject} represents a Python Unicode object.
+ \end{ctypedesc}
+ 
+ \begin{cvardesc}{PyTypeObject}{PyUnicode_Type}
+ This instance of \ctype{PyTypeObject} represents the Python Unicode type.
+ \end{cvardesc}
+ 
+ %--- These are really C macros... is there a macrodesc TeX macro ?
+ 
+ The following APIs are really C macros and can be used to do fast
+ checks and to access internal read-only data of Unicode objects:
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_Check}{PyObject *o}
+ Returns true if the object \var{o} is a Unicode object.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_GET_SIZE}{PyObject *o}
+ Returns the size of the object.  o has to be a
+ PyUnicodeObject (not checked).
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_GET_DATA_SIZE}{PyObject *o}
+ Returns the size of the object's internal buffer in bytes. o has to be
+ a PyUnicodeObject (not checked).
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_AS_UNICODE}{PyObject *o}
+ Returns a pointer to the internal Py_UNICODE buffer of the object. o
+ has to be a PyUnicodeObject (not checked).
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_AS_DATA}{PyObject *o}
+ Returns a (const char *) pointer to the internal buffer of the object.
+ o has to be a PyUnicodeObject (not checked).
+ \end{cfuncdesc}
+ 
+ % --- Unicode character properties ---------------------------------------
+ 
+ Unicode provides many different character properties. The most often
+ needed ones are available through these macros which are mapped to C
+ functions depending on the Python configuration.
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISSPACE}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a whitespace character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISLOWER}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a lowercase character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISUPPER}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a uppercase character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISTITLE}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a titlecase character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISLINEBREAK}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a linebreak character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISDECIMAL}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a decimal character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISDIGIT}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a digit character.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_ISNUMERIC}{Py_UNICODE ch}
+ Returns 1/0 depending on whether \var{ch} is a numeric character.
+ \end{cfuncdesc}
+ 
+ These APIs can be used for fast direct character conversions:
+ 
+ \begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOLOWER}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to lower case.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOUPPER}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to upper case.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{Py_UNICODE}{Py_UNICODE_TOTITLE}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to title case.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_TODECIMAL}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to a decimal positive integer.
+ Returns -1 in case this is not possible. Does not raise exceptions.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{Py_UNICODE_TODIGIT}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to a single digit integer.
+ Returns -1 in case this is not possible. Does not raise exceptions.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{double}{Py_UNICODE_TONUMERIC}{Py_UNICODE ch}
+ Returns the character \var{ch} converted to a (positive) double.
+ Returns -1.0 in case this is not possible. Does not raise exceptions.
+ \end{cfuncdesc}
+ 
+ % --- Plain Py_UNICODE ---------------------------------------------------
+ 
+ To create Unicode objects and access their basic sequence properties,
+ use these APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_FromUnicode}{const Py_UNICODE *u,
+                                                     int size} 
+ 
+ Create a Unicode Object from the Py_UNICODE buffer \var{u} of the
+ given size. \var{u} may be \NULL{} which causes the contents to be
+ undefined. It is the user's responsibility to fill in the needed data.
+ The buffer is copied into the new object.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{Py_UNICODE *}{PyUnicode_AsUnicode}{PyObject *unicode}
+ Return a read-only pointer to the Unicode object's internal
+ \ctype{Py_UNICODE} buffer.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_GetSize}{PyObject *unicode}
+ Return the length of the Unicode object.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_FromObject}{PyObject *obj}
+ 
+ Coerce obj to an Unicode object and return a reference with
+ incremented refcount.
+ 
+ Coercion is done in the following way:
+ \begin{enumerate}
+ \item  Unicode objects are passed back as-is with incremented
+       refcount.
+ 
+ \item String and other char buffer compatible objects are decoded
+       under the assumptions that they contain UTF-8 data. Decoding
+       is done in "strict" mode.
+ 
+ \item All other objects raise an exception.
+ \end{enumerate}
+ The API returns NULL in case of an error. The caller is responsible
+ for decref'ing the returned objects.
+ \end{cfuncdesc}
+ 
+ % --- wchar_t support for platforms which support it ---------------------
+ 
+ If the platform supports \ctype{wchar_t} and provides a header file
+ wchar.h, Python can interface directly to this type using the
+ following functions. Support is optimized if Python's own
+ \ctype{Py_UNICODE} type is identical to the system's \ctype{wchar_t}.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_FromWideChar}{const wchar_t *w,
+                                                      int size}
+ Create a Unicode Object from the \ctype{whcar_t} buffer \var{w} of the
+ given size. Returns \NULL{} on failure.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_AsWideChar}{PyUnicodeObject *unicode,
+                                              wchar_t *w,
+                                              int size}
+ 
+ Copies the Unicode Object contents into the \ctype{whcar_t} buffer
+ \var{w}.  At most \var{size} \ctype{whcar_t} characters are copied.
+ Returns the number of \ctype{whcar_t} characters copied or -1 in case
+ of an error.
+ \end{cfuncdesc}
+ 
+ 
+ \subsubsection{Builtin Codecs \label{builtinCodecs}}
+ 
+ Python provides a set of builtin codecs which are written in C
+ for speed. All of these codecs are directly usable via the
+ following functions.
+ 
+ Many of the following APIs take two arguments encoding and
+ errors. These parameters encoding and errors have the same semantics
+ as the ones of the builtin unicode() Unicode object constructor.
+ 
+ Setting encoding to NULL causes the default encoding to be used which
+ is UTF-8.
+ 
+ Error handling is set by errors which may also be set to NULL meaning
+ to use the default handling defined for the codec. Default error
+ handling for all builtin codecs is ``strict'' (ValueErrors are raised).
+ 
+ The codecs all use a similar interface. Only deviation from the
+ following generic ones are documented for simplicity.
+ 
+ % --- Generic Codecs -----------------------------------------------------
+ 
+ These are the generic codec APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Decode}{const char *s,
+                                                int size,
+                                                const char *encoding,
+                                                const char *errors}
+ 
+ Create a Unicode object by decoding \var{size} bytes of the encoded
+ string \var{s}. \var{encoding} and \var{errors} have the same meaning
+ as the parameters of the same name in the unicode() builtin
+ function. The codec to be used is looked up using the Python codec
+ registry. Returns \NULL{} in case an exception was raised by the
+ codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Encode}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *encoding,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size and returns a
+ Python string object. \var{encoding} and \var{errors} have the same
+ meaning as the parameters of the same name in the Unicode .encode()
+ method. The codec to be used is looked up using the Python codec
+ registry. Returns \NULL{} in case an exception was raised by the
+ codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsEncodedString}{PyObject *unicode,
+                                                const char *encoding,
+                                                const char *errors}
+ 
+ Encodes a Unicode object and returns the result as Python string
+ object. \var{encoding} and \var{errors} have the same meaning as the
+ parameters of the same name in the Unicode .encode() method. The codec
+ to be used is looked up using the Python codec registry. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- UTF-8 Codecs -------------------------------------------------------
+ 
+ These are the UTF-8 codec APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF8}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the UTF-8
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF8}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using UTF-8
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsUTF8String}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using UTF-8 and returns the result as Python
+ string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- UTF-16 Codecs ------------------------------------------------------ */
+ 
+ These are the UTF-16 codec APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUTF16}{const char *s,
+                                                int size,
+                                                const char *errors,
+                                                int *byteorder}
+ 
+ Decodes \var{length} bytes from a UTF-16 encoded buffer string and
+ returns the corresponding Unicode object.
+ 
+ \var{errors} (if non-NULL) defines the error handling. It defaults
+ to ``strict''.
+ 
+ If \var{byteorder} is non-\NULL{}, the decoder starts decoding using
+ the given byte order:
+ 
+ \begin{verbatim}
+    *byteorder == -1: little endian
+    *byteorder == 0:  native order
+    *byteorder == 1:  big endian
+ \end{verbatim}
+ 
+ and then switches according to all byte order marks (BOM) it finds in
+ the input data. BOM marks are not copied into the resulting Unicode
+ string.  After completion, \var{*byteorder} is set to the current byte
+ order at the end of input data.
+ 
+ If \var{byteorder} is \NULL{}, the codec starts in native order mode.
+ 
+ Returns \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUTF16}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors,
+                                                int byteorder}
+ 
+ Returns a Python string object holding the UTF-16 encoded value of the
+ Unicode data in \var{s}.
+ 
+ If \var{byteorder} is not 0, output is written according to the
+ following byte order:
+ 
+ \begin{verbatim}
+    byteorder == -1: little endian
+    byteorder == 0:  native byte order (writes a BOM mark)
+    byteorder == 1:  big endian
+ \end{verbatim}
+ 
+ If byteorder is 0, the output string will always start with the
+ Unicode BOM mark (U+FEFF). In the other two modes, no BOM mark is
+ prepended.
+ 
+ Note that \ctype{Py_UNICODE} data is being interpreted as UTF-16
+ reduced to UCS-2. This trick makes it possible to add full UTF-16
+ capabilities at a later point without comprimising the APIs.
+ 
+ Returns \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsUTF16String}{PyObject *unicode}
+ 
+ Returns a Python string using the UTF-16 encoding in native byte
+ order. The string always starts with a BOM mark. Error handling is
+ ``strict''. Returns \NULL{} in case an exception was raised by the
+ codec.
+ \end{cfuncdesc}
+ 
+ % --- Unicode-Escape Codecs ----------------------------------------------
+ 
+ These are the ``Unicode Esacpe'' codec APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeUnicodeEscape}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the Unicode-Esacpe
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeUnicodeEscape}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using Unicode-Escape
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsUnicodeEscapeString}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using Unicode-Escape and returns the result
+ as Python string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- Raw-Unicode-Escape Codecs ------------------------------------------
+ 
+ These are the ``Raw Unicode Esacpe'' codec APIs:
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeRawUnicodeEscape}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the Raw-Unicode-Esacpe
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeRawUnicodeEscape}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using Raw-Unicode-Escape
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsRawUnicodeEscapeString}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using Raw-Unicode-Escape and returns the result
+ as Python string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- Latin-1 Codecs ----------------------------------------------------- 
+ 
+ These are the Latin-1 codec APIs:
+ 
+ Latin-1 corresponds to the first 256 Unicode ordinals and only these
+ are accepted by the codecs during encoding.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeLatin1}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the Latin-1
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeLatin1}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using Latin-1
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsLatin1String}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using Latin-1 and returns the result as
+ Python string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- ASCII Codecs ------------------------------------------------------- 
+ 
+ These are the ASCII codec APIs:
+ 
+ Only 7-bit ASCII data is excepted. All other codes generate errors.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeASCII}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the ASCII
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeASCII}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using ASCII
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsASCIIString}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using ASCII and returns the result as Python
+ string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- Character Map Codecs ----------------------------------------------- 
+ 
+ These are the mapping codec APIs:
+ 
+ This codec is special in that it can be used to implement many
+ different codecs (and this is in fact what was done to obtain most of
+ the standard codecs included in the \module{encodings} package). The
+ codec uses mapping to encode and decode characters.
+ 
+ Decoding mappings must map single string characters to single Unicode
+ characters, integers (which are then interpreted as Unicode ordinals)
+ or None (meaning "undefined mapping" and causing an error). 
+ 
+ Encoding mappings must map single Unicode characters to single string
+ characters, integers (which are then interpreted as Latin-1 ordinals)
+ or None (meaning "undefined mapping" and causing an error).
+ 
+ The mapping objects provided must only support the __getitem__ mapping
+ interface.
+ 
+ If a character lookup fails with a LookupError, the character is
+ copied as-is meaning that its ordinal value will be interpreted as
+ Unicode or Latin-1 ordinal resp. Because of this, mappings only need
+ to contain those mappings which map characters to different code
+ points.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeCharmap}{const char *s,
+                                                int size,
+                                                PyObject *mapping,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the encoded
+ string \var{s} using the given \var{mapping} object.  Returns \NULL{}
+ in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeCharmap}{const Py_UNICODE *s,
+                                                int size,
+                                                PyObject *mapping,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using the
+ given \var{mapping} object and returns a Python string object.
+ Returns \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsCharmapString}{PyObject *unicode,
+                                                         PyObject *mapping}
+ 
+ Encodes a Unicode objects using the given \var{mapping} object and
+ returns the result as Python string object. Error handling is
+ ``strict''. Returns \NULL{} in case an exception was raised by the
+ codec.
+ \end{cfuncdesc}
+ 
+ The following codec API is special in that maps Unicode to Unicode.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_TranslateCharmap}{const Py_UNICODE *s,
+                                                int size,
+                                                PyObject *table,
+                                                const char *errors}
+ 
+ Translates a \ctype{Py_UNICODE} buffer of the given length by applying
+ a character mapping \var{table} to it and returns the resulting
+ Unicode object.
+ 
+ The \var{mapping} table must map Unicode ordinal integers to Unicode
+ ordinal integers or None (causing deletion of the character).
+ 
+ Mapping tables must only provide the __getitem__ interface,
+ e.g. dictionaries or sequences. Unmapped character ordinals (ones
+ which cause a LookupError) are left untouched and are copied as-is.
+ 
+ Returns \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- MBCS codecs for Windows --------------------------------------------
+ 
+ These are the MBCS codec APIs. They are currently only available
+ Windows and use the Win32 MBCS converters to implement the
+ conversions. 
+ 
+ Note that MBCS (or DBCS) is a class of encodings, not just one.  The
+ target encoding is defined by the user settings on the machine running
+ the codec.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_DecodeMBCS}{const char *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Creates a Unicode object by decoding \var{size} bytes of the MBCS
+ encoded string \var{s}. Returns \NULL{} in case an exception was
+ raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_EncodeMBCS}{const Py_UNICODE *s,
+                                                int size,
+                                                const char *errors}
+ 
+ Encodes the \ctype{Py_UNICODE} buffer of the given size using MBCS
+ and returns a Python string object.  Returns \NULL{} in case an
+ exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_AsMBCSString}{PyObject *unicode}
+ 
+ Encodes a Unicode objects using MBCS and returns the result as Python
+ string object. Error handling is ``strict''. Returns
+ \NULL{} in case an exception was raised by the codec.
+ \end{cfuncdesc}
+ 
+ % --- Methods & Slots ----------------------------------------------------
+ 
+ \subsubsection{Methods and Slot Functions \label{unicodeMethodsAndSlots}}
+ 
+ The following APIs are capable of handling Unicode objects and strings
+ on input (we refer to them as strings in the descriptions) and return
+ Unicode objects or integers as apporpriate.
+ 
+ They all return \NULL{} or -1 in case an exception occurrs.
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Concat}{PyObject *left,
+                                                PyObject *right}
+ 
+ Concat two strings giving a new Unicode string.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Split}{PyObject *s,
+                                               PyObject *sep,
+                                               int maxsplit}
+ 
+ Split a string giving a list of Unicode strings.
+ 
+ If sep is NULL, splitting will be done at all whitespace
+ substrings. Otherwise, splits occur at the given separator.
+ 
+ At most maxsplit splits will be done. If negative, no limit is set.
+ 
+ Separators are not included in the resulting list.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Splitlines}{PyObject *s,
+                                                    int maxsplit}
+ 
+ Dito, but split at line breaks.
+ 
+ CRLF is considered to be one line break. Line breaks are not
+ included in the resulting list.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Translate}{PyObject *str,
+                                                   PyObject *table,
+                                                   const char *errors}
+ 
+ Translate a string by applying a character mapping table to it and
+ return the resulting Unicode object.
+ 
+ The mapping table must map Unicode ordinal integers to Unicode ordinal
+ integers or None (causing deletion of the character).
+ 
+ Mapping tables must only provide the __getitem__ interface,
+ e.g. dictionaries or sequences. Unmapped character ordinals (ones
+ which cause a LookupError) are left untouched and are copied as-is.
+ 
+ \var{errors} has the usual meaning for codecs. It may be \NULL{}
+ which indicates to use the default error handling.
+ 
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Join}{PyObject *separator,
+                                              PyObject *seq}
+ 
+ Join a sequence of strings using the given separator and return
+ the resulting Unicode string.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Tailmatch}{PyObject *str,
+                                                   PyObject *substr,
+                                                   int start,
+                                                   int end,
+                                                   int direction}
+ 
+ Return 1 if \var{substr} matches \var{str}[\var{start}:\var{end}] at
+ the given tail end (\var{direction} == -1 means to do a prefix match,
+ \var{direction} == 1 a suffix match), 0 otherwise.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Find}{PyObject *str,
+                                                   PyObject *substr,
+                                                   int start,
+                                                   int end,
+                                                   int direction}
+ 
+ Return the first position of \var{substr} in
+ \var{str}[\var{start}:\var{end}] using the given \var{direction}
+ (\var{direction} == 1 means to do a forward search,
+ \var{direction} == -1 a backward search), 0 otherwise.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Count}{PyObject *str,
+                                                   PyObject *substr,
+                                                   int start,
+                                                   int end}
+ 
+ Count the number of occurrences of \var{substr} in
+ \var{str}[\var{start}:\var{end}]
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Replace}{PyObject *str,
+                                                 PyObject *substr,
+                                                 PyObject *replstr,
+                                                 int maxcount}
+ 
+ Replace at most \var{maxcount} occurrences of \var{substr} in
+ \var{str} with \var{replstr} and return the resulting Unicode object.
+ \var{maxcount} == -1 means: replace all occurrences.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_Compare}{PyObject *left,
+                                                 PyObject *right}
+ 
+ Compare two strings and return -1, 0, 1 for less than, equal,
+ greater than resp.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{PyObject*}{PyUnicode_Format}{PyObject *format,
+                                               PyObject *args}
+ Returns a new string object from \var{format} and \var{args}.  Analogous
+ to \code{\var{format} \% \var{args}}.  The \var{args} argument must be
+ a tuple.
+ \end{cfuncdesc}
+ 
+ \begin{cfuncdesc}{int}{PyUnicode_Contains}{PyObject *container,
+                                            PyObject *element}
+ 
+ Checks whether \var{element} is contained in \var{container} and
+ returns 1/0 accordingly.
+ 
+ \var{element} has to coerce to an one element Unicode string. -1 is
+ returned in case of an error.
+ \end{cfuncdesc}
+ 
+ 
  \subsection{Buffer Objects \label{bufferObjects}}
  \sectionauthor{Greg Stein}{gstein@lyra.org}