[Python-checkins] python/dist/src/Doc/ref ref2.tex,1.43,1.44

Tue, 24 Sep 2002 14:08:39 -0700

Update of /cvsroot/python/python/dist/src/Doc/ref
In directory usw-pr-cvs1:/tmp/cvs-serv21168/ref

Modified Files:
	ref2.tex 
Log Message:
Another try at clarifying what goes into and comes out of Unicode objects.

Index: ref2.tex
===================================================================
RCS file: /cvsroot/python/python/dist/src/Doc/ref/ref2.tex,v
retrieving revision 1.43
retrieving revision 1.44
diff -C2 -d -r1.43 -r1.44
*** ref2.tex	3 Sep 2002 11:52:43 -0000	1.43
--- ref2.tex	24 Sep 2002 21:08:37 -0000	1.44
***************
*** 411,437 ****
  \index{C}

! \begin{tableii}{l|l}{code}{Escape Sequence}{Meaning}
! \lineii{\e\var{newline}} {Ignored}
! \lineii{\e\e}	{Backslash (\code{\e})}
! \lineii{\e'}	{Single quote (\code{'})}
! \lineii{\e"}	{Double quote (\code{"})}
! \lineii{\e a}	{\ASCII{} Bell (BEL)}
! \lineii{\e b}	{\ASCII{} Backspace (BS)}
! \lineii{\e f}	{\ASCII{} Formfeed (FF)}
! \lineii{\e n}	{\ASCII{} Linefeed (LF)}
! \lineii{\e N\{\var{name}\}}
!        {Character named \var{name} in the Unicode database (Unicode only)}
! \lineii{\e r}	{\ASCII{} Carriage Return (CR)}
! \lineii{\e t}	{\ASCII{} Horizontal Tab (TAB)}
! \lineii{\e u\var{xxxx}}    {Character with 16-bit hex value \var{xxxx} (Unicode only)}
! \lineii{\e U\var{xxxxxxxx}}{Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}
! \lineii{\e v}	{\ASCII{} Vertical Tab (VT)}
! \lineii{\e\var{ooo}} {\ASCII{} character with octal value \var{ooo}}
! \lineii{\e x\var{hh}} {\ASCII{} character with hex value \var{hh}}
! \end{tableii}
  \index{ASCII@\ASCII}

! As in Standard C, up to three octal digits are accepted.  However,
! exactly two hex digits are taken in hex escapes.

  Unlike Standard \index{unrecognized escape sequence}C,
--- 411,456 ----
  \index{C}

! \begin{tableiii}{l|l|c}{code}{Escape Sequence}{Meaning}{Notes}
! \lineiii{\e\var{newline}} {Ignored}{}
! \lineiii{\e\e}	{Backslash (\code{\e})}{}
! \lineiii{\e'}	{Single quote (\code{'})}{}
! \lineiii{\e"}	{Double quote (\code{"})}{}
! \lineiii{\e a}	{\ASCII{} Bell (BEL)}{}
! \lineiii{\e b}	{\ASCII{} Backspace (BS)}{}
! \lineiii{\e f}	{\ASCII{} Formfeed (FF)}{}
! \lineiii{\e n}	{\ASCII{} Linefeed (LF)}{}
! \lineiii{\e N\{\var{name}\}}
!         {Character named \var{name} in the Unicode database (Unicode only)}{}
! \lineiii{\e r}	{\ASCII{} Carriage Return (CR)}{}
! \lineiii{\e t}	{\ASCII{} Horizontal Tab (TAB)}{}
! \lineiii{\e u\var{xxxx}}
!         {Character with 16-bit hex value \var{xxxx} (Unicode only)}{(1)}
! \lineiii{\e U\var{xxxxxxxx}}
!         {Character with 32-bit hex value \var{xxxxxxxx} (Unicode only)}{(2)}
! \lineiii{\e v}	{\ASCII{} Vertical Tab (VT)}{}
! \lineiii{\e\var{ooo}} {\ASCII{} character with octal value \var{ooo}}{(3)}
! \lineiii{\e x\var{hh}} {\ASCII{} character with hex value \var{hh}}{(4)}
! \end{tableiii}
  \index{ASCII@\ASCII}

! \noindent
! Notes:
! 
! \begin{itemize}
! \item[(1)]
!   Individual code units which form parts of a surrogate pair can be
!   encoded using this escape sequence.
! \item[(2)]
!   Any Unicode character can be encoded this way, but characters
!   outside the Basic Multilingual Plane (BMP) will be encoded using a
!   surrogate pair if Python is compiled to use 16-bit code units (the
!   default).  Individual code units which form parts of a surrogate
!   pair can be encoded using this escape sequence.
! \item[(3)]
!   As in Standard C, up to three octal digits are accepted.
! \item[(4)]
!   Unlike in Standard C, at most two hex digits are accepted.
! \end{itemize}
! 

  Unlike Standard \index{unrecognized escape sequence}C,
***************
*** 461,470 ****
  with a \character{u} or \character{U} prefix, then the \code{\e uXXXX}
  escape sequence is processed while \emph{all other backslashes are
! left in the string}.  For example, the string literal \code{ur"\e
! u0062\e n"} consists of three Unicode characters: `LATIN SMALL LETTER
! B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'.  Backslashes can be
! escaped with a preceding backslash; however, both remain in the
! string.  As a result, \code{\e uXXXX} escape sequences are only
! recognized when there are an odd number of backslashes.

  \subsection{String literal concatenation\label{string-catenation}}
--- 480,489 ----
  with a \character{u} or \character{U} prefix, then the \code{\e uXXXX}
  escape sequence is processed while \emph{all other backslashes are
! left in the string}.  For example, the string literal
! \code{ur"\e{}u0062\e n"} consists of three Unicode characters: `LATIN
! SMALL LETTER B', `REVERSE SOLIDUS', and `LATIN SMALL LETTER N'.
! Backslashes can be escaped with a preceding backslash; however, both
! remain in the string.  As a result, \code{\e uXXXX} escape sequences
! are only recognized when there are an odd number of backslashes.

  \subsection{String literal concatenation\label{string-catenation}}